1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993
31 *	The Regents of the University of California.  All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Rick Macklem at The University of Guelph.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 *    must display the following acknowledgement:
46 *	This product includes software developed by the University of
47 *	California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 *    may be used to endorse or promote products derived from this software
50 *    without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
65 * FreeBSD-Id: nfs_vnops.c,v 1.72 1997/11/07 09:20:48 phk Exp $
66 */
67
68
69/*
70 * vnode op calls for Sun NFS version 2 and 3
71 */
72#include <sys/param.h>
73#include <sys/kernel.h>
74#include <sys/systm.h>
75#include <sys/resourcevar.h>
76#include <sys/proc_internal.h>
77#include <sys/kauth.h>
78#include <sys/mount_internal.h>
79#include <sys/malloc.h>
80#include <sys/kpi_mbuf.h>
81#include <sys/conf.h>
82#include <sys/vnode_internal.h>
83#include <sys/dirent.h>
84#include <sys/fcntl.h>
85#include <sys/lockf.h>
86#include <sys/ubc_internal.h>
87#include <sys/attr.h>
88#include <sys/signalvar.h>
89#include <sys/uio_internal.h>
90
91#include <vfs/vfs_support.h>
92
93#include <sys/vm.h>
94
95#include <sys/time.h>
96#include <kern/clock.h>
97#include <libkern/OSAtomic.h>
98
99#include <miscfs/fifofs/fifo.h>
100#include <miscfs/specfs/specdev.h>
101
102#include <nfs/rpcv2.h>
103#include <nfs/nfsproto.h>
104#include <nfs/nfs.h>
105#include <nfs/nfsnode.h>
106#include <nfs/nfs_gss.h>
107#include <nfs/nfsmount.h>
108#include <nfs/nfs_lock.h>
109#include <nfs/xdr_subs.h>
110#include <nfs/nfsm_subs.h>
111
112#include <net/if.h>
113#include <netinet/in.h>
114#include <netinet/in_var.h>
115#include <vm/vm_kern.h>
116
117#include <kern/task.h>
118#include <kern/sched_prim.h>
119#include <libkern/OSAtomic.h>
120
121/*
122 * NFS vnode ops
123 */
124static int	nfs_vnop_lookup(struct vnop_lookup_args *);
125static int	nfsspec_vnop_read(struct vnop_read_args *);
126static int	nfsspec_vnop_write(struct vnop_write_args *);
127static int	nfsspec_vnop_close(struct vnop_close_args *);
128#if FIFO
129static int	nfsfifo_vnop_read(struct vnop_read_args *);
130static int	nfsfifo_vnop_write(struct vnop_write_args *);
131static int	nfsfifo_vnop_close(struct vnop_close_args *);
132#endif
133static int	nfs_vnop_ioctl(struct vnop_ioctl_args *);
134static int	nfs_vnop_select(struct vnop_select_args *);
135static int	nfs_vnop_setattr(struct vnop_setattr_args *);
136static int	nfs_vnop_read(struct vnop_read_args *);
137static int	nfs_vnop_mmap(struct vnop_mmap_args *);
138static int	nfs_vnop_fsync(struct vnop_fsync_args *);
139static int	nfs_vnop_remove(struct vnop_remove_args *);
140static int	nfs_vnop_rename(struct vnop_rename_args *);
141static int	nfs_vnop_readdir(struct vnop_readdir_args *);
142static int	nfs_vnop_readlink(struct vnop_readlink_args *);
143static int	nfs_vnop_pathconf(struct vnop_pathconf_args *);
144static int	nfs_vnop_pagein(struct vnop_pagein_args *);
145static int	nfs_vnop_pageout(struct vnop_pageout_args *);
146static int	nfs_vnop_blktooff(struct vnop_blktooff_args *);
147static int	nfs_vnop_offtoblk(struct vnop_offtoblk_args *);
148static int	nfs_vnop_blockmap(struct vnop_blockmap_args *);
149
150static int	nfs3_vnop_create(struct vnop_create_args *);
151static int	nfs3_vnop_mknod(struct vnop_mknod_args *);
152static int	nfs3_vnop_getattr(struct vnop_getattr_args *);
153static int	nfs3_vnop_link(struct vnop_link_args *);
154static int	nfs3_vnop_mkdir(struct vnop_mkdir_args *);
155static int	nfs3_vnop_rmdir(struct vnop_rmdir_args *);
156static int	nfs3_vnop_symlink(struct vnop_symlink_args *);
157
158vnop_t **nfsv2_vnodeop_p;
159static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
160	{ &vnop_default_desc, (vnop_t *)vn_default_error },
161	{ &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup },	/* lookup */
162	{ &vnop_create_desc, (vnop_t *)nfs3_vnop_create },	/* create */
163	{ &vnop_mknod_desc, (vnop_t *)nfs3_vnop_mknod },	/* mknod */
164	{ &vnop_open_desc, (vnop_t *)nfs3_vnop_open },		/* open */
165	{ &vnop_close_desc, (vnop_t *)nfs3_vnop_close },	/* close */
166	{ &vnop_access_desc, (vnop_t *)nfs_vnop_access },	/* access */
167	{ &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr },	/* getattr */
168	{ &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },	/* setattr */
169	{ &vnop_read_desc, (vnop_t *)nfs_vnop_read },		/* read */
170	{ &vnop_write_desc, (vnop_t *)nfs_vnop_write },		/* write */
171	{ &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl },		/* ioctl */
172	{ &vnop_select_desc, (vnop_t *)nfs_vnop_select },	/* select */
173	{ &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke },	/* revoke */
174	{ &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap },		/* mmap */
175	{ &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },		/* fsync */
176	{ &vnop_remove_desc, (vnop_t *)nfs_vnop_remove },	/* remove */
177	{ &vnop_link_desc, (vnop_t *)nfs3_vnop_link },		/* link */
178	{ &vnop_rename_desc, (vnop_t *)nfs_vnop_rename },	/* rename */
179	{ &vnop_mkdir_desc, (vnop_t *)nfs3_vnop_mkdir },	/* mkdir */
180	{ &vnop_rmdir_desc, (vnop_t *)nfs3_vnop_rmdir },	/* rmdir */
181	{ &vnop_symlink_desc, (vnop_t *)nfs3_vnop_symlink },	/* symlink */
182	{ &vnop_readdir_desc, (vnop_t *)nfs_vnop_readdir },	/* readdir */
183	{ &vnop_readlink_desc, (vnop_t *)nfs_vnop_readlink },	/* readlink */
184	{ &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },	/* inactive */
185	{ &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },	/* reclaim */
186	{ &vnop_strategy_desc, (vnop_t *)err_strategy },	/* strategy */
187	{ &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf },	/* pathconf */
188	{ &vnop_advlock_desc, (vnop_t *)nfs3_vnop_advlock },	/* advlock */
189	{ &vnop_bwrite_desc, (vnop_t *)err_bwrite },		/* bwrite */
190	{ &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },	/* Pagein */
191	{ &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },	/* Pageout */
192	{ &vnop_copyfile_desc, (vnop_t *)err_copyfile },	/* Copyfile */
193	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
194	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
195	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
196	{ NULL, NULL }
197};
198struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
199	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
200
201vnop_t **nfsv4_vnodeop_p;
202static struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = {
203	{ &vnop_default_desc, (vnop_t *)vn_default_error },
204	{ &vnop_lookup_desc, (vnop_t *)nfs_vnop_lookup },	/* lookup */
205	{ &vnop_create_desc, (vnop_t *)nfs4_vnop_create },	/* create */
206	{ &vnop_mknod_desc, (vnop_t *)nfs4_vnop_mknod },	/* mknod */
207	{ &vnop_open_desc, (vnop_t *)nfs4_vnop_open },		/* open */
208	{ &vnop_close_desc, (vnop_t *)nfs4_vnop_close },	/* close */
209	{ &vnop_access_desc, (vnop_t *)nfs_vnop_access },	/* access */
210	{ &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr },	/* getattr */
211	{ &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },	/* setattr */
212	{ &vnop_read_desc, (vnop_t *)nfs_vnop_read },		/* read */
213	{ &vnop_write_desc, (vnop_t *)nfs_vnop_write },		/* write */
214	{ &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl },		/* ioctl */
215	{ &vnop_select_desc, (vnop_t *)nfs_vnop_select },	/* select */
216	{ &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke },	/* revoke */
217	{ &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap },		/* mmap */
218	{ &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },		/* fsync */
219	{ &vnop_remove_desc, (vnop_t *)nfs_vnop_remove },	/* remove */
220	{ &vnop_link_desc, (vnop_t *)nfs4_vnop_link },		/* link */
221	{ &vnop_rename_desc, (vnop_t *)nfs_vnop_rename },	/* rename */
222	{ &vnop_mkdir_desc, (vnop_t *)nfs4_vnop_mkdir },	/* mkdir */
223	{ &vnop_rmdir_desc, (vnop_t *)nfs4_vnop_rmdir },	/* rmdir */
224	{ &vnop_symlink_desc, (vnop_t *)nfs4_vnop_symlink },	/* symlink */
225	{ &vnop_readdir_desc, (vnop_t *)nfs_vnop_readdir },	/* readdir */
226	{ &vnop_readlink_desc, (vnop_t *)nfs_vnop_readlink },	/* readlink */
227	{ &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },	/* inactive */
228	{ &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },	/* reclaim */
229	{ &vnop_strategy_desc, (vnop_t *)err_strategy },	/* strategy */
230	{ &vnop_pathconf_desc, (vnop_t *)nfs_vnop_pathconf },	/* pathconf */
231	{ &vnop_advlock_desc, (vnop_t *)nfs4_vnop_advlock },	/* advlock */
232	{ &vnop_bwrite_desc, (vnop_t *)err_bwrite },		/* bwrite */
233	{ &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },	/* Pagein */
234	{ &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },	/* Pageout */
235	{ &vnop_copyfile_desc, (vnop_t *)err_copyfile },	/* Copyfile */
236	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
237	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
238	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
239	{ NULL, NULL }
240};
241struct vnodeopv_desc nfsv4_vnodeop_opv_desc =
242	{ &nfsv4_vnodeop_p, nfsv4_vnodeop_entries };
243
244/*
245 * Special device vnode ops
246 */
247vnop_t **spec_nfsv2nodeop_p;
248static struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
249	{ &vnop_default_desc, (vnop_t *)vn_default_error },
250	{ &vnop_lookup_desc, (vnop_t *)spec_lookup },		/* lookup */
251	{ &vnop_create_desc, (vnop_t *)spec_create },		/* create */
252	{ &vnop_mknod_desc, (vnop_t *)spec_mknod },		/* mknod */
253	{ &vnop_open_desc, (vnop_t *)spec_open },		/* open */
254	{ &vnop_close_desc, (vnop_t *)nfsspec_vnop_close },	/* close */
255	{ &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr },	/* getattr */
256	{ &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },	/* setattr */
257	{ &vnop_read_desc, (vnop_t *)nfsspec_vnop_read },	/* read */
258	{ &vnop_write_desc, (vnop_t *)nfsspec_vnop_write },	/* write */
259	{ &vnop_ioctl_desc, (vnop_t *)spec_ioctl },		/* ioctl */
260	{ &vnop_select_desc, (vnop_t *)spec_select },		/* select */
261	{ &vnop_revoke_desc, (vnop_t *)spec_revoke },		/* revoke */
262	{ &vnop_mmap_desc, (vnop_t *)spec_mmap },		/* mmap */
263	{ &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },		/* fsync */
264	{ &vnop_remove_desc, (vnop_t *)spec_remove },		/* remove */
265	{ &vnop_link_desc, (vnop_t *)spec_link },		/* link */
266	{ &vnop_rename_desc, (vnop_t *)spec_rename },		/* rename */
267	{ &vnop_mkdir_desc, (vnop_t *)spec_mkdir },		/* mkdir */
268	{ &vnop_rmdir_desc, (vnop_t *)spec_rmdir },		/* rmdir */
269	{ &vnop_symlink_desc, (vnop_t *)spec_symlink },		/* symlink */
270	{ &vnop_readdir_desc, (vnop_t *)spec_readdir },		/* readdir */
271	{ &vnop_readlink_desc, (vnop_t *)spec_readlink },	/* readlink */
272	{ &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },	/* inactive */
273	{ &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },	/* reclaim */
274	{ &vnop_strategy_desc, (vnop_t *)spec_strategy },	/* strategy */
275	{ &vnop_pathconf_desc, (vnop_t *)spec_pathconf },	/* pathconf */
276	{ &vnop_advlock_desc, (vnop_t *)spec_advlock },		/* advlock */
277	{ &vnop_bwrite_desc, (vnop_t *)vn_bwrite },		/* bwrite */
278	{ &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },	/* Pagein */
279	{ &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },	/* Pageout */
280	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
281	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
282	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
283	{ NULL, NULL }
284};
285struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
286	{ &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
287vnop_t **spec_nfsv4nodeop_p;
288static struct vnodeopv_entry_desc spec_nfsv4nodeop_entries[] = {
289	{ &vnop_default_desc, (vnop_t *)vn_default_error },
290	{ &vnop_lookup_desc, (vnop_t *)spec_lookup },		/* lookup */
291	{ &vnop_create_desc, (vnop_t *)spec_create },		/* create */
292	{ &vnop_mknod_desc, (vnop_t *)spec_mknod },		/* mknod */
293	{ &vnop_open_desc, (vnop_t *)spec_open },		/* open */
294	{ &vnop_close_desc, (vnop_t *)nfsspec_vnop_close },	/* close */
295	{ &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr },	/* getattr */
296	{ &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },	/* setattr */
297	{ &vnop_read_desc, (vnop_t *)nfsspec_vnop_read },	/* read */
298	{ &vnop_write_desc, (vnop_t *)nfsspec_vnop_write },	/* write */
299	{ &vnop_ioctl_desc, (vnop_t *)spec_ioctl },		/* ioctl */
300	{ &vnop_select_desc, (vnop_t *)spec_select },		/* select */
301	{ &vnop_revoke_desc, (vnop_t *)spec_revoke },		/* revoke */
302	{ &vnop_mmap_desc, (vnop_t *)spec_mmap },		/* mmap */
303	{ &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },		/* fsync */
304	{ &vnop_remove_desc, (vnop_t *)spec_remove },		/* remove */
305	{ &vnop_link_desc, (vnop_t *)spec_link },		/* link */
306	{ &vnop_rename_desc, (vnop_t *)spec_rename },		/* rename */
307	{ &vnop_mkdir_desc, (vnop_t *)spec_mkdir },		/* mkdir */
308	{ &vnop_rmdir_desc, (vnop_t *)spec_rmdir },		/* rmdir */
309	{ &vnop_symlink_desc, (vnop_t *)spec_symlink },		/* symlink */
310	{ &vnop_readdir_desc, (vnop_t *)spec_readdir },		/* readdir */
311	{ &vnop_readlink_desc, (vnop_t *)spec_readlink },	/* readlink */
312	{ &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },	/* inactive */
313	{ &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },	/* reclaim */
314	{ &vnop_strategy_desc, (vnop_t *)spec_strategy },	/* strategy */
315	{ &vnop_pathconf_desc, (vnop_t *)spec_pathconf },	/* pathconf */
316	{ &vnop_advlock_desc, (vnop_t *)spec_advlock },		/* advlock */
317	{ &vnop_bwrite_desc, (vnop_t *)vn_bwrite },		/* bwrite */
318	{ &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },	/* Pagein */
319	{ &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },	/* Pageout */
320	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
321	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
322	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
323	{ NULL, NULL }
324};
325struct vnodeopv_desc spec_nfsv4nodeop_opv_desc =
326	{ &spec_nfsv4nodeop_p, spec_nfsv4nodeop_entries };
327
328#if FIFO
329vnop_t **fifo_nfsv2nodeop_p;
330static struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
331	{ &vnop_default_desc, (vnop_t *)vn_default_error },
332	{ &vnop_lookup_desc, (vnop_t *)fifo_lookup },		/* lookup */
333	{ &vnop_create_desc, (vnop_t *)fifo_create },		/* create */
334	{ &vnop_mknod_desc, (vnop_t *)fifo_mknod },		/* mknod */
335	{ &vnop_open_desc, (vnop_t *)fifo_open },		/* open */
336	{ &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close },	/* close */
337	{ &vnop_getattr_desc, (vnop_t *)nfs3_vnop_getattr },	/* getattr */
338	{ &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },	/* setattr */
339	{ &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read },	/* read */
340	{ &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write },	/* write */
341	{ &vnop_ioctl_desc, (vnop_t *)fifo_ioctl },		/* ioctl */
342	{ &vnop_select_desc, (vnop_t *)fifo_select },		/* select */
343	{ &vnop_revoke_desc, (vnop_t *)fifo_revoke },		/* revoke */
344	{ &vnop_mmap_desc, (vnop_t *)fifo_mmap },		/* mmap */
345	{ &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },		/* fsync */
346	{ &vnop_remove_desc, (vnop_t *)fifo_remove },		/* remove */
347	{ &vnop_link_desc, (vnop_t *)fifo_link },		/* link */
348	{ &vnop_rename_desc, (vnop_t *)fifo_rename },		/* rename */
349	{ &vnop_mkdir_desc, (vnop_t *)fifo_mkdir },		/* mkdir */
350	{ &vnop_rmdir_desc, (vnop_t *)fifo_rmdir },		/* rmdir */
351	{ &vnop_symlink_desc, (vnop_t *)fifo_symlink },		/* symlink */
352	{ &vnop_readdir_desc, (vnop_t *)fifo_readdir },		/* readdir */
353	{ &vnop_readlink_desc, (vnop_t *)fifo_readlink },	/* readlink */
354	{ &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },	/* inactive */
355	{ &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },	/* reclaim */
356	{ &vnop_strategy_desc, (vnop_t *)fifo_strategy },	/* strategy */
357	{ &vnop_pathconf_desc, (vnop_t *)fifo_pathconf },	/* pathconf */
358	{ &vnop_advlock_desc, (vnop_t *)fifo_advlock },		/* advlock */
359	{ &vnop_bwrite_desc, (vnop_t *)vn_bwrite },		/* bwrite */
360	{ &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },	/* Pagein */
361	{ &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },	/* Pageout */
362	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
363	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
364	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
365	{ NULL, NULL }
366};
367struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
368	{ &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
369
370vnop_t **fifo_nfsv4nodeop_p;
371static struct vnodeopv_entry_desc fifo_nfsv4nodeop_entries[] = {
372	{ &vnop_default_desc, (vnop_t *)vn_default_error },
373	{ &vnop_lookup_desc, (vnop_t *)fifo_lookup },		/* lookup */
374	{ &vnop_create_desc, (vnop_t *)fifo_create },		/* create */
375	{ &vnop_mknod_desc, (vnop_t *)fifo_mknod },		/* mknod */
376	{ &vnop_open_desc, (vnop_t *)fifo_open },		/* open */
377	{ &vnop_close_desc, (vnop_t *)nfsfifo_vnop_close },	/* close */
378	{ &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr },	/* getattr */
379	{ &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr },	/* setattr */
380	{ &vnop_read_desc, (vnop_t *)nfsfifo_vnop_read },	/* read */
381	{ &vnop_write_desc, (vnop_t *)nfsfifo_vnop_write },	/* write */
382	{ &vnop_ioctl_desc, (vnop_t *)fifo_ioctl },		/* ioctl */
383	{ &vnop_select_desc, (vnop_t *)fifo_select },		/* select */
384	{ &vnop_revoke_desc, (vnop_t *)fifo_revoke },		/* revoke */
385	{ &vnop_mmap_desc, (vnop_t *)fifo_mmap },		/* mmap */
386	{ &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync },		/* fsync */
387	{ &vnop_remove_desc, (vnop_t *)fifo_remove },		/* remove */
388	{ &vnop_link_desc, (vnop_t *)fifo_link },		/* link */
389	{ &vnop_rename_desc, (vnop_t *)fifo_rename },		/* rename */
390	{ &vnop_mkdir_desc, (vnop_t *)fifo_mkdir },		/* mkdir */
391	{ &vnop_rmdir_desc, (vnop_t *)fifo_rmdir },		/* rmdir */
392	{ &vnop_symlink_desc, (vnop_t *)fifo_symlink },		/* symlink */
393	{ &vnop_readdir_desc, (vnop_t *)fifo_readdir },		/* readdir */
394	{ &vnop_readlink_desc, (vnop_t *)fifo_readlink },	/* readlink */
395	{ &vnop_inactive_desc, (vnop_t *)nfs_vnop_inactive },	/* inactive */
396	{ &vnop_reclaim_desc, (vnop_t *)nfs_vnop_reclaim },	/* reclaim */
397	{ &vnop_strategy_desc, (vnop_t *)fifo_strategy },	/* strategy */
398	{ &vnop_pathconf_desc, (vnop_t *)fifo_pathconf },	/* pathconf */
399	{ &vnop_advlock_desc, (vnop_t *)fifo_advlock },		/* advlock */
400	{ &vnop_bwrite_desc, (vnop_t *)vn_bwrite },		/* bwrite */
401	{ &vnop_pagein_desc, (vnop_t *)nfs_vnop_pagein },	/* Pagein */
402	{ &vnop_pageout_desc, (vnop_t *)nfs_vnop_pageout },	/* Pageout */
403	{ &vnop_blktooff_desc, (vnop_t *)nfs_vnop_blktooff },	/* blktooff */
404	{ &vnop_offtoblk_desc, (vnop_t *)nfs_vnop_offtoblk },	/* offtoblk */
405	{ &vnop_blockmap_desc, (vnop_t *)nfs_vnop_blockmap },	/* blockmap */
406	{ NULL, NULL }
407};
408struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc =
409	{ &fifo_nfsv4nodeop_p, fifo_nfsv4nodeop_entries };
410#endif /* FIFO */
411
412
413static int	nfs_sillyrename(nfsnode_t,nfsnode_t,struct componentname *,vfs_context_t);
414
415/*
416 * Find the slot in the access cache for this UID.
417 * If adding and no existing slot is found, reuse slots in FIFO order.
418 * The index of the next slot to use is kept in the last entry of the n_mode array.
419 */
420int
421nfs_node_mode_slot(nfsnode_t np, uid_t uid, int add)
422{
423	int slot;
424
425	for (slot=0; slot < NFS_ACCESS_CACHE_SIZE; slot++)
426		if (np->n_modeuid[slot] == uid)
427			break;
428	if (slot == NFS_ACCESS_CACHE_SIZE) {
429		if (!add)
430			return (-1);
431		slot = np->n_mode[NFS_ACCESS_CACHE_SIZE];
432		np->n_mode[NFS_ACCESS_CACHE_SIZE] = (slot + 1) % NFS_ACCESS_CACHE_SIZE;
433	}
434	return (slot);
435}
436
437int
438nfs3_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx)
439{
440	int error = 0, status, slot;
441	uint32_t access = 0;
442	u_int64_t xid;
443	struct nfsm_chain nmreq, nmrep;
444	struct timeval now;
445	uid_t uid;
446
447	nfsm_chain_null(&nmreq);
448	nfsm_chain_null(&nmrep);
449
450	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3) + NFSX_UNSIGNED);
451	nfsm_chain_add_fh(error, &nmreq, NFS_VER3, np->n_fhp, np->n_fhsize);
452	nfsm_chain_add_32(error, &nmreq, *mode);
453	nfsm_chain_build_done(error, &nmreq);
454	nfsmout_if(error);
455	error = nfs_request(np, NULL, &nmreq, NFSPROC_ACCESS, ctx,
456			&nmrep, &xid, &status);
457	nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
458	if (!error)
459		error = status;
460	nfsm_chain_get_32(error, &nmrep, access);
461	nfsmout_if(error);
462
463	uid = kauth_cred_getuid(vfs_context_ucred(ctx));
464	slot = nfs_node_mode_slot(np, uid, 1);
465	np->n_modeuid[slot] = uid;
466	microuptime(&now);
467	np->n_modestamp[slot] = now.tv_sec;
468	np->n_mode[slot] = access;
469
470	/*
471	 * If we asked for DELETE but didn't get it, the server
472	 * may simply not support returning that bit (possible
473	 * on UNIX systems).  So, we'll assume that it is OK,
474	 * and just let any subsequent delete action fail if it
475	 * really isn't deletable.
476	 */
477	if ((*mode & NFS_ACCESS_DELETE) &&
478	    !(np->n_mode[slot] & NFS_ACCESS_DELETE))
479		np->n_mode[slot] |= NFS_ACCESS_DELETE;
480	/* pass back the mode returned with this request */
481	*mode = np->n_mode[slot];
482nfsmout:
483	nfsm_chain_cleanup(&nmreq);
484	nfsm_chain_cleanup(&nmrep);
485	return (error);
486}
487
488/*
489 * NFS access vnode op.
490 * For NFS version 2, just return ok. File accesses may fail later.
491 * For NFS version 3+, use the access RPC to check accessibility. If file modes
492 * are changed on the server, accesses might still fail later.
493 */
494int
495nfs_vnop_access(
496	struct vnop_access_args /* {
497		struct vnodeop_desc *a_desc;
498		vnode_t a_vp;
499		int a_action;
500		vfs_context_t a_context;
501	} */ *ap)
502{
503	vfs_context_t ctx = ap->a_context;
504	vnode_t vp = ap->a_vp;
505	int error = 0, slot, dorpc;
506	u_long mode, wmode;
507	nfsnode_t np = VTONFS(vp);
508	struct nfsmount *nmp;
509	int nfsvers;
510	struct timeval now;
511	uid_t uid;
512
513	nmp = VTONMP(vp);
514	if (!nmp)
515		return (ENXIO);
516	nfsvers = nmp->nm_vers;
517
518	if (nfsvers == NFS_VER2) {
519		if ((ap->a_action & KAUTH_VNODE_WRITE_RIGHTS) &&
520		    vfs_isrdonly(vnode_mount(vp)))
521			return (EROFS);
522		return (0);
523	}
524
525	/*
526	 * For NFS v3, do an access rpc, otherwise you are stuck emulating
527	 * ufs_access() locally using the vattr. This may not be correct,
528	 * since the server may apply other access criteria such as
529	 * client uid-->server uid mapping that we do not know about, but
530	 * this is better than just returning anything that is lying about
531	 * in the cache.
532	 */
533
534	/*
535	 * Convert KAUTH primitives to NFS access rights.
536	 */
537	mode = 0;
538	if (vnode_isdir(vp)) {
539		/* directory */
540		if (ap->a_action &
541		    (KAUTH_VNODE_LIST_DIRECTORY |
542		    KAUTH_VNODE_READ_EXTATTRIBUTES))
543			mode |= NFS_ACCESS_READ;
544		if (ap->a_action & KAUTH_VNODE_SEARCH)
545			mode |= NFS_ACCESS_LOOKUP;
546		if (ap->a_action &
547		    (KAUTH_VNODE_ADD_FILE |
548		    KAUTH_VNODE_ADD_SUBDIRECTORY))
549			mode |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
550		if (ap->a_action & KAUTH_VNODE_DELETE_CHILD)
551			mode |= NFS_ACCESS_MODIFY;
552	} else {
553		/* file */
554		if (ap->a_action &
555		    (KAUTH_VNODE_READ_DATA |
556		    KAUTH_VNODE_READ_EXTATTRIBUTES))
557			mode |= NFS_ACCESS_READ;
558		if (ap->a_action & KAUTH_VNODE_WRITE_DATA)
559			mode |= NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
560		if (ap->a_action & KAUTH_VNODE_APPEND_DATA)
561			mode |= NFS_ACCESS_EXTEND;
562		if (ap->a_action & KAUTH_VNODE_EXECUTE)
563			mode |= NFS_ACCESS_EXECUTE;
564	}
565	/* common */
566	if (ap->a_action & KAUTH_VNODE_DELETE)
567		mode |= NFS_ACCESS_DELETE;
568	if (ap->a_action &
569	    (KAUTH_VNODE_WRITE_ATTRIBUTES |
570	    KAUTH_VNODE_WRITE_EXTATTRIBUTES |
571	    KAUTH_VNODE_WRITE_SECURITY))
572		mode |= NFS_ACCESS_MODIFY;
573	/* XXX this is pretty dubious */
574	if (ap->a_action & KAUTH_VNODE_CHANGE_OWNER)
575		mode |= NFS_ACCESS_MODIFY;
576
577	/* if caching, always ask for every right */
578	if (nfs_access_cache_timeout > 0) {
579		wmode = NFS_ACCESS_READ | NFS_ACCESS_MODIFY |
580			NFS_ACCESS_EXTEND | NFS_ACCESS_EXECUTE |
581			NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
582	} else {
583		wmode = mode;
584	}
585
586	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
587		return (error);
588
589	/*
590	 * Does our cached result allow us to give a definite yes to
591	 * this request?
592	 */
593	uid = kauth_cred_getuid(vfs_context_ucred(ctx));
594	slot = nfs_node_mode_slot(np, uid, 0);
595	dorpc = 1;
596	if (NMODEVALID(np, slot)) {
597		microuptime(&now);
598		if ((now.tv_sec < (np->n_modestamp[slot] + nfs_access_cache_timeout)) &&
599		    ((np->n_mode[slot] & mode) == mode)) {
600			/* OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_hits); */
601			dorpc = 0;
602			wmode = np->n_mode[slot];
603		}
604	}
605	if (dorpc) {
606		/* Either a no, or a don't know.  Go to the wire. */
607		/* OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_misses); */
608		error = nmp->nm_funcs->nf_access_rpc(np, &wmode, ctx);
609	}
610	if (!error && ((wmode & mode) != mode))
611		error = EACCES;
612	nfs_unlock(np);
613
614	return (error);
615}
616
617/*
618 * NFS open vnode op
619 */
620int
621nfs3_vnop_open(
622	struct vnop_open_args /* {
623		struct vnodeop_desc *a_desc;
624		vnode_t a_vp;
625		int a_mode;
626		vfs_context_t a_context;
627	} */ *ap)
628{
629	vfs_context_t ctx = ap->a_context;
630	vnode_t vp = ap->a_vp;
631	nfsnode_t np = VTONFS(vp);
632	struct nfsmount *nmp;
633	struct nfs_vattr nvattr;
634	enum vtype vtype;
635	int error, nfsvers;
636
637	nmp = VTONMP(vp);
638	if (!nmp)
639		return (ENXIO);
640	nfsvers = nmp->nm_vers;
641
642	vtype = vnode_vtype(vp);
643	if ((vtype != VREG) && (vtype != VDIR) && (vtype != VLNK))
644		return (EACCES);
645	if (ISSET(np->n_flag, NUPDATESIZE))
646		nfs_data_update_size(np, 0);
647	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
648		return (error);
649	if (np->n_flag & NNEEDINVALIDATE) {
650		np->n_flag &= ~NNEEDINVALIDATE;
651		nfs_unlock(np);
652		nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
653		if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
654			return (error);
655	}
656	if (np->n_flag & NMODIFIED) {
657		nfs_unlock(np);
658		if ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1)) == EINTR)
659			return (error);
660		if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
661			return (error);
662		if (vtype == VDIR)
663			np->n_direofoffset = 0;
664		NATTRINVALIDATE(np); /* For Open/Close consistency */
665		error = nfs_getattr(np, &nvattr, ctx, 1);
666		if (error) {
667			nfs_unlock(np);
668			return (error);
669		}
670		if (vtype == VDIR) {
671			/* if directory changed, purge any name cache entries */
672			if (NFS_CHANGED_NC(nfsvers, np, &nvattr)) {
673				np->n_flag &= ~NNEGNCENTRIES;
674				cache_purge(vp);
675			}
676			NFS_CHANGED_UPDATE_NC(nfsvers, np, &nvattr);
677		}
678		NFS_CHANGED_UPDATE(nfsvers, np, &nvattr);
679	} else {
680		NATTRINVALIDATE(np); /* For Open/Close consistency */
681		error = nfs_getattr(np, &nvattr, ctx, 1);
682		if (error) {
683			nfs_unlock(np);
684			return (error);
685		}
686		if (NFS_CHANGED(nfsvers, np, &nvattr)) {
687			if (vtype == VDIR) {
688				np->n_direofoffset = 0;
689				nfs_invaldir(np);
690				/* purge name cache entries */
691				if (NFS_CHANGED_NC(nfsvers, np, &nvattr)) {
692					np->n_flag &= ~NNEGNCENTRIES;
693					cache_purge(vp);
694				}
695			}
696			nfs_unlock(np);
697			if ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1)) == EINTR)
698				return (error);
699			if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
700				return (error);
701			if (vtype == VDIR)
702				NFS_CHANGED_UPDATE_NC(nfsvers, np, &nvattr);
703			NFS_CHANGED_UPDATE(nfsvers, np, &nvattr);
704		}
705	}
706	nfs_unlock(np);
707	return (0);
708}
709
710/*
711 * NFS close vnode op
712 * What an NFS client should do upon close after writing is a debatable issue.
713 * Most NFS clients push delayed writes to the server upon close, basically for
714 * two reasons:
715 * 1 - So that any write errors may be reported back to the client process
716 *     doing the close system call. By far the two most likely errors are
717 *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
718 * 2 - To put a worst case upper bound on cache inconsistency between
719 *     multiple clients for the file.
720 * There is also a consistency problem for Version 2 of the protocol w.r.t.
721 * not being able to tell if other clients are writing a file concurrently,
722 * since there is no way of knowing if the changed modify time in the reply
723 * is only due to the write for this client.
724 * (NFS Version 3 provides weak cache consistency data in the reply that
725 *  should be sufficient to detect and handle this case.)
726 *
727 * The current code does the following:
728 * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
729 * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
730 *                     them.
731 */
732int
733nfs3_vnop_close(
734	struct vnop_close_args /* {
735		struct vnodeop_desc *a_desc;
736		vnode_t a_vp;
737		int a_fflag;
738		vfs_context_t a_context;
739	} */ *ap)
740{
741	vfs_context_t ctx = ap->a_context;
742	vnode_t vp = ap->a_vp;
743	nfsnode_t np = VTONFS(vp);
744	struct nfsmount *nmp;
745	int nfsvers;
746	int error = 0;
747
748	if (vnode_vtype(vp) != VREG)
749		return (0);
750	nmp = VTONMP(vp);
751	if (!nmp)
752		return (ENXIO);
753	nfsvers = nmp->nm_vers;
754
755	if (ISSET(np->n_flag, NUPDATESIZE))
756		nfs_data_update_size(np, 0);
757	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
758		return (error);
759	if (np->n_flag & NNEEDINVALIDATE) {
760		np->n_flag &= ~NNEEDINVALIDATE;
761		nfs_unlock(np);
762		nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
763		if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
764			return (error);
765	}
766	if (np->n_flag & NMODIFIED) {
767		nfs_unlock(np);
768		if (nfsvers != NFS_VER2)
769			error = nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), 0);
770		else
771			error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
772		if (error)
773			return (error);
774		nfs_lock(np, NFS_NODE_LOCK_FORCE);
775		NATTRINVALIDATE(np);
776	}
777	if (np->n_flag & NWRITEERR) {
778		np->n_flag &= ~NWRITEERR;
779		error = np->n_error;
780	}
781	nfs_unlock(np);
782	return (error);
783}
784
785
786int
787nfs3_getattr_rpc(
788	nfsnode_t np,
789	mount_t mp,
790	u_char *fhp,
791	size_t fhsize,
792	vfs_context_t ctx,
793	struct nfs_vattr *nvap,
794	u_int64_t *xidp)
795{
796	struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
797	int error = 0, status, nfsvers;
798	struct nfsm_chain nmreq, nmrep;
799
800	if (!nmp)
801		return (ENXIO);
802	nfsvers = nmp->nm_vers;
803
804	nfsm_chain_null(&nmreq);
805	nfsm_chain_null(&nmrep);
806
807	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
808	if (nfsvers != NFS_VER2)
809		nfsm_chain_add_32(error, &nmreq, fhsize);
810	nfsm_chain_add_opaque(error, &nmreq, fhp, fhsize);
811	nfsm_chain_build_done(error, &nmreq);
812	nfsmout_if(error);
813	error = nfs_request(np, mp, &nmreq, NFSPROC_GETATTR, ctx,
814			&nmrep, xidp, &status);
815	if (!error)
816		error = status;
817	nfsmout_if(error);
818	error = nfs_parsefattr(&nmrep, nfsvers, nvap);
819nfsmout:
820	nfsm_chain_cleanup(&nmreq);
821	nfsm_chain_cleanup(&nmrep);
822	return (error);
823}
824
825
826int
827nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int alreadylocked)
828{
829	struct nfsmount *nmp;
830	int error = 0, lockerror = ENOENT, nfsvers, avoidfloods;
831	u_int64_t xid;
832
833	FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag);
834
835	/* Update local times for special files. */
836	if (np->n_flag & (NACC | NUPD)) {
837		if (!alreadylocked)
838			nfs_lock(np, NFS_NODE_LOCK_FORCE);
839		np->n_flag |= NCHG;
840		if (!alreadylocked)
841			nfs_unlock(np);
842	}
843	/* Update size, if necessary */
844	if (!alreadylocked && ISSET(np->n_flag, NUPDATESIZE))
845		nfs_data_update_size(np, 0);
846
847	/*
848	 * First look in the cache.
849	 */
850	if ((error = nfs_getattrcache(np, nvap, alreadylocked)) == 0)
851		goto nfsmout;
852	if (error != ENOENT)
853		goto nfsmout;
854
855	nmp = NFSTONMP(np);
856	if (!nmp) {
857		error = ENXIO;
858		goto nfsmout;
859	}
860	nfsvers = nmp->nm_vers;
861
862	/*
863	 * Try to get both the attributes and access info by making an
864	 * ACCESS call and seeing if it returns updated attributes.
865	 * But don't bother if we aren't caching access info or if the
866	 * attributes returned wouldn't be cached.
867	 */
868	if ((nfsvers != NFS_VER2) && (nfs_access_cache_timeout > 0)) {
869		if (!alreadylocked && ((error = lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))))
870			goto nfsmout;
871		if (nfs_attrcachetimeout(np) > 0) {
872			/*  OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_misses); */
873			u_long mode = NFS_ACCESS_ALL;
874			error = nmp->nm_funcs->nf_access_rpc(np, &mode, ctx);
875			if (error)
876				goto nfsmout;
877			if ((error = nfs_getattrcache(np, nvap, 1)) == 0)
878				goto nfsmout;
879			if (error != ENOENT)
880				goto nfsmout;
881			error = 0;
882		}
883	} else if (!alreadylocked) {
884		error = lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
885		nfsmout_if(error);
886	}
887	avoidfloods = 0;
888tryagain:
889	error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, ctx, nvap, &xid);
890	nfsmout_if(error);
891	error = nfs_loadattrcache(np, nvap, &xid, 0);
892	nfsmout_if(error);
893	if (!xid) { /* out-of-order rpc - attributes were dropped */
894		FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid);
895		if (avoidfloods++ < 100)
896			goto tryagain;
897		/* avoidfloods>1 is bizarre.  at 100 pull the plug */
898		panic("nfs_getattr: getattr flood\n");
899	}
900	if (NFS_CHANGED(nfsvers, np, nvap)) {
901		vnode_t vp = NFSTOV(np);
902		enum vtype vtype = vnode_vtype(vp);
903		FSDBG(513, -1, np, -1, np);
904		if (vtype == VDIR) {
905			nfs_invaldir(np);
906			/* purge name cache entries */
907			if (NFS_CHANGED_NC(nfsvers, np, nvap)) {
908				np->n_flag &= ~NNEGNCENTRIES;
909				cache_purge(vp);
910			}
911		}
912		if (!alreadylocked) {
913			nfs_unlock(np);
914			lockerror = ENOENT;
915			error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
916			FSDBG(513, -1, np, -2, error);
917			if (!error)
918				error = lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
919			if (!error) {
920				if (vtype == VDIR)
921					NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
922				NFS_CHANGED_UPDATE(nfsvers, np, nvap);
923			}
924		} else {
925			/* invalidate later */
926			np->n_flag |= NNEEDINVALIDATE;
927		}
928	}
929nfsmout:
930	if (!lockerror)
931		nfs_unlock(np);
932	FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
933	return (error);
934}
935
936/*
937 * NFS getattr call from vfs.
938 */
939static int
940nfs3_vnop_getattr(
941	struct vnop_getattr_args /* {
942		struct vnodeop_desc *a_desc;
943		vnode_t a_vp;
944		struct vnode_attr *a_vap;
945		vfs_context_t a_context;
946	} */ *ap)
947{
948	int error;
949	struct nfs_vattr nva;
950	struct vnode_attr *vap = ap->a_vap;
951	dev_t rdev;
952
953	error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, 0);
954	if (error)
955		return (error);
956
957	/* copy nva to *a_vap */
958	VATTR_RETURN(vap, va_type, nva.nva_type);
959	VATTR_RETURN(vap, va_mode, nva.nva_mode);
960	rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
961	VATTR_RETURN(vap, va_rdev, rdev);
962	VATTR_RETURN(vap, va_uid, nva.nva_uid);
963	VATTR_RETURN(vap, va_gid, nva.nva_gid);
964	VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
965	VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
966	VATTR_RETURN(vap, va_data_size, nva.nva_size);
967	VATTR_RETURN(vap, va_data_alloc, nva.nva_bytes);
968	VATTR_RETURN(vap, va_iosize, nfs_iosize);
969	vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
970	vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
971	VATTR_SET_SUPPORTED(vap, va_access_time);
972	vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
973	vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
974	VATTR_SET_SUPPORTED(vap, va_modify_time);
975	vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
976	vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
977	VATTR_SET_SUPPORTED(vap, va_change_time);
978
979	// VATTR_RETURN(vap, va_encoding, 0xffff /* kTextEncodingUnknown */);
980	return (error);
981}
982
983/*
984 * NFS setattr call.
985 */
986static int
987nfs_vnop_setattr(
988	struct vnop_setattr_args /* {
989		struct vnodeop_desc *a_desc;
990		vnode_t a_vp;
991		struct vnode_attr *a_vap;
992		vfs_context_t a_context;
993	} */ *ap)
994{
995	vfs_context_t ctx = ap->a_context;
996	vnode_t vp = ap->a_vp;
997	nfsnode_t np = VTONFS(vp);
998	struct nfsmount *nmp;
999	struct vnode_attr *vap = ap->a_vap;
1000	int error = 0;
1001	int biosize, nfsvers;
1002	u_quad_t origsize;
1003	struct nfs_dulookup dul;
1004	nfsnode_t dnp = NULL;
1005	vnode_t dvp = NULL;
1006	const char *vname = NULL;
1007
1008	nmp = VTONMP(vp);
1009	if (!nmp)
1010		return (ENXIO);
1011	nfsvers = nmp->nm_vers;
1012	biosize = nmp->nm_biosize;
1013
1014	/* Disallow write attempts if the filesystem is mounted read-only. */
1015	if (vnode_vfsisrdonly(vp))
1016		return (EROFS);
1017
1018	origsize = np->n_size;
1019	if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1020		switch (vnode_vtype(vp)) {
1021		case VDIR:
1022			return (EISDIR);
1023		case VCHR:
1024		case VBLK:
1025		case VSOCK:
1026		case VFIFO:
1027			if (!VATTR_IS_ACTIVE(vap, va_modify_time) &&
1028			    !VATTR_IS_ACTIVE(vap, va_access_time) &&
1029			    !VATTR_IS_ACTIVE(vap, va_mode) &&
1030			    !VATTR_IS_ACTIVE(vap, va_uid) &&
1031			    !VATTR_IS_ACTIVE(vap, va_gid)) {
1032				return (0);
1033			}
1034			VATTR_CLEAR_ACTIVE(vap, va_data_size);
1035			break;
1036		default:
1037			/*
1038			 * Disallow write attempts if the filesystem is
1039			 * mounted read-only.
1040			 */
1041			if (vnode_vfsisrdonly(vp))
1042				return (EROFS);
1043			FSDBG_TOP(512, np->n_size, vap->va_data_size,
1044				  np->n_vattr.nva_size, np->n_flag);
1045			/* clear NNEEDINVALIDATE, if set */
1046			if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
1047				return (error);
1048			if (np->n_flag & NNEEDINVALIDATE)
1049				np->n_flag &= ~NNEEDINVALIDATE;
1050			nfs_unlock(np);
1051			/* flush everything */
1052			error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0) , ctx, 1);
1053			if (error) {
1054				printf("nfs_setattr: nfs_vinvalbuf %d\n", error);
1055				FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
1056				return (error);
1057			}
1058			nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
1059			if (np->n_size > vap->va_data_size) { /* shrinking? */
1060				daddr64_t obn, bn;
1061				int neweofoff, mustwrite;
1062				struct nfsbuf *bp;
1063
1064				obn = (np->n_size - 1) / biosize;
1065				bn = vap->va_data_size / biosize;
1066				for ( ; obn >= bn; obn--) {
1067					if (!nfs_buf_is_incore(np, obn))
1068						continue;
1069					error = nfs_buf_get(np, obn, biosize, NULL, NBLK_READ, &bp);
1070					if (error)
1071						continue;
1072					if (obn != bn) {
1073						FSDBG(512, bp, bp->nb_flags, 0, obn);
1074						SET(bp->nb_flags, NB_INVAL);
1075						nfs_buf_release(bp, 1);
1076						continue;
1077					}
1078					mustwrite = 0;
1079					neweofoff = vap->va_data_size - NBOFF(bp);
1080					/* check for any dirty data before the new EOF */
1081					if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
1082						/* clip dirty range to EOF */
1083						if (bp->nb_dirtyend > neweofoff) {
1084							bp->nb_dirtyend = neweofoff;
1085							if (bp->nb_dirtyoff >= bp->nb_dirtyend)
1086								bp->nb_dirtyoff = bp->nb_dirtyend = 0;
1087						}
1088						if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff))
1089							mustwrite++;
1090					}
1091					bp->nb_dirty &= (1 << round_page_32(neweofoff)/PAGE_SIZE) - 1;
1092					if (bp->nb_dirty)
1093						mustwrite++;
1094					if (!mustwrite) {
1095						FSDBG(512, bp, bp->nb_flags, 0, obn);
1096						SET(bp->nb_flags, NB_INVAL);
1097						nfs_buf_release(bp, 1);
1098						continue;
1099					}
1100					/* gotta write out dirty data before invalidating */
1101					/* (NB_STABLE indicates that data writes should be FILESYNC) */
1102					/* (NB_NOCACHE indicates buffer should be discarded) */
1103					CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
1104					SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
1105					if (!IS_VALID_CRED(bp->nb_wcred)) {
1106						kauth_cred_t cred = vfs_context_ucred(ctx);
1107						kauth_cred_ref(cred);
1108						bp->nb_wcred = cred;
1109					}
1110					error = nfs_buf_write(bp);
1111					// Note: bp has been released
1112					if (error) {
1113						FSDBG(512, bp, 0xd00dee, 0xbad, error);
1114						nfs_lock(np, NFS_NODE_LOCK_FORCE);
1115						np->n_error = error;
1116						np->n_flag |= NWRITEERR;
1117						/*
1118						 * There was a write error and we need to
1119						 * invalidate attrs and flush buffers in
1120						 * order to sync up with the server.
1121						 * (if this write was extending the file,
1122						 * we may no longer know the correct size)
1123						 */
1124						NATTRINVALIDATE(np);
1125						nfs_unlock(np);
1126						nfs_data_unlock(np);
1127						nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
1128						nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
1129						error = 0;
1130					}
1131				}
1132			}
1133			if (vap->va_data_size != np->n_size)
1134				ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */
1135			origsize = np->n_size;
1136			np->n_size = np->n_vattr.nva_size = vap->va_data_size;
1137			CLR(np->n_flag, NUPDATESIZE);
1138			FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
1139		}
1140	} else if (VATTR_IS_ACTIVE(vap, va_modify_time) ||
1141		    VATTR_IS_ACTIVE(vap, va_access_time) ||
1142		    (vap->va_vaflags & VA_UTIMES_NULL)) {
1143		if ((error = nfs_lock(np, NFS_NODE_LOCK_SHARED)))
1144			return (error);
1145		if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) {
1146			nfs_unlock(np);
1147			error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1148			if (error == EINTR)
1149				return (error);
1150		} else {
1151			nfs_unlock(np);
1152		}
1153	}
1154	if (VATTR_IS_ACTIVE(vap, va_mode) ||
1155	    VATTR_IS_ACTIVE(vap, va_uid) ||
1156	    VATTR_IS_ACTIVE(vap, va_gid)) {
1157		if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) {
1158			if (VATTR_IS_ACTIVE(vap, va_data_size))
1159				nfs_data_unlock(np);
1160			return (error);
1161		}
1162		NMODEINVALIDATE(np);
1163		nfs_unlock(np);
1164		dvp = vnode_getparent(vp);
1165		vname = vnode_getname(vp);
1166		dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
1167		if (dnp) {
1168			error = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE);
1169			if (error) {
1170				dnp = NULL;
1171				error = 0;
1172			}
1173		}
1174		if (dnp) {
1175			nfs_dulookup_init(&dul, dnp, vname, strlen(vname));
1176			nfs_dulookup_start(&dul, dnp, ctx);
1177		}
1178	}
1179
1180	error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx, 0);
1181
1182	if (VATTR_IS_ACTIVE(vap, va_mode) ||
1183	    VATTR_IS_ACTIVE(vap, va_uid) ||
1184	    VATTR_IS_ACTIVE(vap, va_gid)) {
1185		if (dnp) {
1186			nfs_dulookup_finish(&dul, dnp, ctx);
1187			nfs_unlock(dnp);
1188		}
1189		if (dvp != NULLVP)
1190			vnode_put(dvp);
1191		if (vname != NULL)
1192			vnode_putname(vname);
1193	}
1194
1195	FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
1196	if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1197		if (error && (origsize != np->n_size)) {
1198			/* make every effort to resync file size w/ server... */
1199			int err; /* preserve "error" for return */
1200			np->n_size = np->n_vattr.nva_size = origsize;
1201			CLR(np->n_flag, NUPDATESIZE);
1202			FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
1203			ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
1204			vap->va_data_size = origsize;
1205			err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx, 0);
1206			if (err)
1207				printf("nfs_vnop_setattr: nfs%d_setattr_rpc %d %d\n", nfsvers, error, err);
1208		}
1209		nfs_data_unlock(np);
1210	}
1211	return (error);
1212}
1213
1214/*
1215 * Do an NFS setattr RPC.
1216 */
1217int
1218nfs3_setattr_rpc(
1219	nfsnode_t np,
1220	struct vnode_attr *vap,
1221	vfs_context_t ctx,
1222	int alreadylocked)
1223{
1224	struct nfsmount *nmp = NFSTONMP(np);
1225	int error = 0, lockerror = ENOENT, status, wccpostattr = 0, nfsvers;
1226	u_int64_t xid;
1227	struct nfsm_chain nmreq, nmrep;
1228
1229	if (!nmp)
1230		return (ENXIO);
1231	nfsvers = nmp->nm_vers;
1232
1233	VATTR_SET_SUPPORTED(vap, va_mode);
1234	VATTR_SET_SUPPORTED(vap, va_uid);
1235	VATTR_SET_SUPPORTED(vap, va_gid);
1236	VATTR_SET_SUPPORTED(vap, va_data_size);
1237	VATTR_SET_SUPPORTED(vap, va_access_time);
1238	VATTR_SET_SUPPORTED(vap, va_modify_time);
1239
1240	if (VATTR_IS_ACTIVE(vap, va_flags)) {
1241		if (vap->va_flags) {	/* we don't support setting flags */
1242			if (vap->va_active & ~VNODE_ATTR_va_flags)
1243				return (EINVAL);	/* return EINVAL if other attributes also set */
1244			else
1245				return (ENOTSUP);	/* return ENOTSUP for chflags(2) */
1246		}
1247		/* no flags set, so we'll just ignore it */
1248		if (!(vap->va_active & ~VNODE_ATTR_va_flags))
1249			return (0); /* no (other) attributes to set, so nothing to do */
1250	}
1251
1252	nfsm_chain_null(&nmreq);
1253	nfsm_chain_null(&nmrep);
1254
1255	nfsm_chain_build_alloc_init(error, &nmreq,
1256		NFSX_FH(nfsvers) + NFSX_SATTR(nfsvers));
1257	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1258	if (nfsvers == NFS_VER3) {
1259		if (VATTR_IS_ACTIVE(vap, va_mode)) {
1260			nfsm_chain_add_32(error, &nmreq, TRUE);
1261			nfsm_chain_add_32(error, &nmreq, vap->va_mode);
1262		} else {
1263			nfsm_chain_add_32(error, &nmreq, FALSE);
1264		}
1265		if (VATTR_IS_ACTIVE(vap, va_uid)) {
1266			nfsm_chain_add_32(error, &nmreq, TRUE);
1267			nfsm_chain_add_32(error, &nmreq, vap->va_uid);
1268		} else {
1269			nfsm_chain_add_32(error, &nmreq, FALSE);
1270		}
1271		if (VATTR_IS_ACTIVE(vap, va_gid)) {
1272			nfsm_chain_add_32(error, &nmreq, TRUE);
1273			nfsm_chain_add_32(error, &nmreq, vap->va_gid);
1274		} else {
1275			nfsm_chain_add_32(error, &nmreq, FALSE);
1276		}
1277		if (VATTR_IS_ACTIVE(vap, va_data_size)) {
1278			nfsm_chain_add_32(error, &nmreq, TRUE);
1279			nfsm_chain_add_64(error, &nmreq, vap->va_data_size);
1280		} else {
1281			nfsm_chain_add_32(error, &nmreq, FALSE);
1282		}
1283		if (vap->va_vaflags & VA_UTIMES_NULL) {
1284			nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
1285			nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
1286		} else {
1287			if (VATTR_IS_ACTIVE(vap, va_access_time)) {
1288				nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
1289				nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
1290				nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_nsec);
1291			} else {
1292				nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
1293			}
1294			if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
1295				nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
1296				nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
1297				nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_nsec);
1298			} else {
1299				nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
1300			}
1301		}
1302		nfsm_chain_add_32(error, &nmreq, FALSE);
1303	} else {
1304		nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_mode) ?
1305			vtonfsv2_mode(vnode_vtype(NFSTOV(np)), vap->va_mode) : -1);
1306		nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_uid) ?
1307			vap->va_uid : (uint32_t)-1);
1308		nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_gid) ?
1309			vap->va_gid : (uint32_t)-1);
1310		nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_data_size) ?
1311			vap->va_data_size : (uint32_t)-1);
1312		if (VATTR_IS_ACTIVE(vap, va_access_time)) {
1313			nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
1314			nfsm_chain_add_32(error, &nmreq, (vap->va_access_time.tv_nsec != -1) ?
1315				((uint32_t)vap->va_access_time.tv_nsec / 1000) : 0xffffffff);
1316		} else {
1317			nfsm_chain_add_32(error, &nmreq, -1);
1318			nfsm_chain_add_32(error, &nmreq, -1);
1319		}
1320		if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
1321			nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
1322			nfsm_chain_add_32(error, &nmreq, (vap->va_modify_time.tv_nsec != -1) ?
1323				((uint32_t)vap->va_modify_time.tv_nsec / 1000) : 0xffffffff);
1324		} else {
1325			nfsm_chain_add_32(error, &nmreq, -1);
1326			nfsm_chain_add_32(error, &nmreq, -1);
1327		}
1328	}
1329	nfsm_chain_build_done(error, &nmreq);
1330	nfsmout_if(error);
1331	error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx,
1332			&nmrep, &xid, &status);
1333	if (!alreadylocked && ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))))
1334		error = lockerror;
1335	if (nfsvers == NFS_VER3) {
1336		struct timespec premtime = { 0, 0 };
1337		nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
1338		nfsmout_if(error);
1339		/* if file hadn't changed, update cached mtime */
1340		if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
1341			NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
1342		/* if directory hadn't changed, update namecache mtime */
1343		if ((vnode_vtype(NFSTOV(np)) == VDIR) &&
1344		    nfstimespeccmp(&np->n_ncmtime, &premtime, ==))
1345			NFS_CHANGED_UPDATE_NC(nfsvers, np, &np->n_vattr);
1346		if (!wccpostattr)
1347			NATTRINVALIDATE(np);
1348		error = status;
1349	} else {
1350		if (!error)
1351			error = status;
1352		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
1353	}
1354nfsmout:
1355	if (!alreadylocked && !lockerror)
1356		nfs_unlock(np);
1357	nfsm_chain_cleanup(&nmreq);
1358	nfsm_chain_cleanup(&nmrep);
1359	return (error);
1360}
1361
1362/*
1363 * NFS lookup call, one step at a time...
1364 * First look in cache
1365 * If not found, unlock the directory nfsnode and do the RPC
1366 */
1367static int
1368nfs_vnop_lookup(
1369	struct vnop_lookup_args /* {
1370		struct vnodeop_desc *a_desc;
1371		vnode_t a_dvp;
1372		vnode_t *a_vpp;
1373		struct componentname *a_cnp;
1374		vfs_context_t a_context;
1375	} */ *ap)
1376{
1377	vfs_context_t ctx = ap->a_context;
1378	struct componentname *cnp = ap->a_cnp;
1379	vnode_t dvp = ap->a_dvp;
1380	vnode_t *vpp = ap->a_vpp;
1381	int flags = cnp->cn_flags;
1382	vnode_t newvp;
1383	nfsnode_t dnp, np;
1384	struct nfsmount *nmp;
1385	mount_t mp;
1386	int nfsvers, error, lockerror = ENOENT, isdot, isdotdot, negnamecache;
1387	u_int64_t xid;
1388	struct nfs_vattr nvattr;
1389	int ngflags;
1390	struct vnop_access_args naa;
1391	fhandle_t fh;
1392	struct nfsreq rq, *req = &rq;
1393
1394	*vpp = NULLVP;
1395
1396	dnp = VTONFS(dvp);
1397
1398	mp = vnode_mount(dvp);
1399	nmp = VFSTONFS(mp);
1400	if (!nmp) {
1401		error = ENXIO;
1402		goto error_return;
1403	}
1404	nfsvers = nmp->nm_vers;
1405	negnamecache = !(nmp->nm_flag & NFSMNT_NONEGNAMECACHE);
1406
1407	error = lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE);
1408	if (!error)
1409		error = nfs_getattr(dnp, &nvattr, ctx, 1);
1410	if (error)
1411		goto error_return;
1412	if (NFS_CHANGED_NC(nfsvers, dnp, &nvattr)) {
1413		/*
1414		 * This directory has changed on us.
1415		 * Purge any name cache entries.
1416		 */
1417		dnp->n_flag &= ~NNEGNCENTRIES;
1418		cache_purge(dvp);
1419		NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &nvattr);
1420	}
1421
1422	error = cache_lookup(dvp, vpp, cnp);
1423	switch (error) {
1424	case ENOENT:
1425		/* negative cache entry */
1426		goto error_return;
1427	case 0:
1428		/* cache miss */
1429		break;
1430	case -1:
1431		/* cache hit, not really an error */
1432		OSAddAtomic(1, (SInt32*)&nfsstats.lookupcache_hits);
1433
1434		nfs_unlock(dnp);
1435		lockerror = ENOENT;
1436
1437		/* check for directory access */
1438		naa.a_vp = dvp;
1439		naa.a_action = KAUTH_VNODE_SEARCH;
1440		naa.a_context = ctx;
1441
1442		/* compute actual success/failure based on accessibility */
1443		error = nfs_vnop_access(&naa);
1444		/* FALLTHROUGH */
1445	default:
1446		/* unexpected error from cache_lookup */
1447		goto error_return;
1448	}
1449
1450	/* skip lookup, if we know who we are: "." or ".." */
1451	isdot = isdotdot = 0;
1452	if (cnp->cn_nameptr[0] == '.') {
1453		if (cnp->cn_namelen == 1)
1454			isdot = 1;
1455		if ((cnp->cn_namelen == 2) && (cnp->cn_nameptr[1] == '.'))
1456			isdotdot = 1;
1457	}
1458	if (isdotdot || isdot) {
1459		fh.fh_len = 0;
1460		goto found;
1461	}
1462
1463	/* do we know this name is too long? */
1464	nmp = VTONMP(dvp);
1465	if (!nmp) {
1466		error = ENXIO;
1467		goto error_return;
1468	}
1469	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
1470	     (cnp->cn_namelen > (long)nmp->nm_fsattr.nfsa_maxname)) {
1471		error = ENAMETOOLONG;
1472		goto error_return;
1473	}
1474
1475	error = 0;
1476	newvp = NULLVP;
1477
1478	OSAddAtomic(1, (SInt32*)&nfsstats.lookupcache_misses);
1479
1480	error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
1481	nfsmout_if(error);
1482	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, req, &xid, &fh, &nvattr);
1483	nfsmout_if(error);
1484
1485	/* is the file handle the same as this directory's file handle? */
1486	isdot = NFS_CMPFH(dnp, fh.fh_data, fh.fh_len);
1487
1488found:
1489
1490	if (flags & ISLASTCN) {
1491		switch (cnp->cn_nameiop) {
1492		case DELETE:
1493			cnp->cn_flags &= ~MAKEENTRY;
1494			break;
1495		case RENAME:
1496			cnp->cn_flags &= ~MAKEENTRY;
1497			if (isdot) {
1498				error = EISDIR;
1499				goto error_return;
1500			}
1501			break;
1502		}
1503	}
1504
1505	if (isdotdot) {
1506		nfs_unlock(dnp);
1507		lockerror = ENOENT;
1508		newvp = vnode_getparent(dvp);
1509		if (!newvp) {
1510			error = ENOENT;
1511			goto error_return;
1512		}
1513	} else if (isdot) {
1514		error = vnode_get(dvp);
1515		if (error)
1516			goto error_return;
1517		newvp = dvp;
1518		if (fh.fh_len && (dnp->n_xid <= xid))
1519			nfs_loadattrcache(dnp, &nvattr, &xid, 0);
1520	} else {
1521		ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
1522		error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, ngflags, &np);
1523		if (error)
1524			goto error_return;
1525		newvp = NFSTOV(np);
1526		nfs_unlock(np);
1527	}
1528	*vpp = newvp;
1529
1530nfsmout:
1531	if (error) {
1532		if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
1533		    (flags & ISLASTCN) && (error == ENOENT)) {
1534			if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp))
1535				error = EROFS;
1536			else
1537				error = EJUSTRETURN;
1538		}
1539	}
1540	if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) &&
1541	    (cnp->cn_nameiop != CREATE) && negnamecache) {
1542		/* add a negative entry in the name cache */
1543		cache_enter(dvp, NULL, cnp);
1544		dnp->n_flag |= NNEGNCENTRIES;
1545	}
1546error_return:
1547	if (!lockerror)
1548		nfs_unlock(dnp);
1549	if (error && *vpp) {
1550	        vnode_put(*vpp);
1551		*vpp = NULLVP;
1552	}
1553	return (error);
1554}
1555
1556/*
1557 * NFS read call.
1558 * Just call nfs_bioread() to do the work.
1559 */
1560static int
1561nfs_vnop_read(
1562	struct vnop_read_args /* {
1563		struct vnodeop_desc *a_desc;
1564		vnode_t a_vp;
1565		struct uio *a_uio;
1566		int a_ioflag;
1567		vfs_context_t a_context;
1568	} */ *ap)
1569{
1570	if (vnode_vtype(ap->a_vp) != VREG)
1571		return (EPERM);
1572	return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, NULL, ap->a_context));
1573}
1574
1575
1576/*
1577 * NFS readlink call
1578 */
1579static int
1580nfs_vnop_readlink(
1581	struct vnop_readlink_args /* {
1582		struct vnodeop_desc *a_desc;
1583		vnode_t a_vp;
1584		struct uio *a_uio;
1585		vfs_context_t a_context;
1586	} */ *ap)
1587{
1588	vfs_context_t ctx = ap->a_context;
1589	nfsnode_t np = VTONFS(ap->a_vp);
1590	struct nfsmount *nmp;
1591	int error = 0, lockerror, nfsvers, changed = 0, n;
1592	uint32_t buflen;
1593	struct uio *uio = ap->a_uio;
1594	struct nfs_vattr nvattr;
1595	struct nfsbuf *bp = NULL;
1596
1597	if (vnode_vtype(ap->a_vp) != VLNK)
1598		return (EPERM);
1599
1600	if (uio_uio_resid(uio) == 0)
1601		return (0);
1602	if (uio->uio_offset < 0)
1603		return (EINVAL);
1604
1605	nmp = VTONMP(ap->a_vp);
1606	if (!nmp)
1607		return (ENXIO);
1608	nfsvers = nmp->nm_vers;
1609
1610	error = lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
1611	if (!error)
1612		error = nfs_getattr(np, &nvattr, ctx, 1);
1613	if (error) {
1614		if (!lockerror)
1615			nfs_unlock(np);
1616		FSDBG(531, np, 0xd1e0001, 0, error);
1617		return (error);
1618	}
1619	if (NFS_CHANGED(nfsvers, np, &nvattr)) {
1620		/* link changed, so just ignore NB_CACHE */
1621		changed = 1;
1622		NFS_CHANGED_UPDATE(nfsvers, np, &nvattr);
1623	}
1624	nfs_unlock(np);
1625
1626	OSAddAtomic(1, (SInt32*)&nfsstats.biocache_readlinks);
1627	error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_READ, &bp);
1628	if (error) {
1629		FSDBG(531, np, 0xd1e0002, 0, error);
1630		return (error);
1631	}
1632	if (changed)
1633		CLR(bp->nb_flags, NB_CACHE);
1634	if (!ISSET(bp->nb_flags, NB_CACHE)) {
1635		SET(bp->nb_flags, NB_READ);
1636		CLR(bp->nb_flags, NB_DONE);
1637		OSAddAtomic(1, (SInt32*)&nfsstats.readlink_bios);
1638		buflen = bp->nb_bufsize;
1639		error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
1640		if (error) {
1641			SET(bp->nb_flags, NB_ERROR);
1642			bp->nb_error = error;
1643		} else {
1644			bp->nb_validoff = 0;
1645			bp->nb_validend = buflen;
1646		}
1647		nfs_buf_iodone(bp);
1648	}
1649	if (!error) {
1650		// LP64todo - fix this!
1651		n = min(uio_uio_resid(uio), bp->nb_validend);
1652		if (n > 0)
1653			error = uiomove(bp->nb_data, n, uio);
1654	}
1655	FSDBG(531, np, bp->nb_validend, 0, error);
1656	nfs_buf_release(bp, 1);
1657	return (error);
1658}
1659
1660/*
1661 * Do a readlink RPC.
1662 */
1663int
1664nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
1665{
1666	struct nfsmount *nmp;
1667	int error = 0, lockerror = ENOENT, nfsvers, status;
1668	uint32_t len;
1669	u_int64_t xid;
1670	struct nfsm_chain nmreq, nmrep;
1671
1672	nmp = NFSTONMP(np);
1673	if (!nmp)
1674		return (ENXIO);
1675	nfsvers = nmp->nm_vers;
1676	nfsm_chain_null(&nmreq);
1677	nfsm_chain_null(&nmrep);
1678
1679	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
1680	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1681	nfsm_chain_build_done(error, &nmreq);
1682	nfsmout_if(error);
1683	error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx,
1684			&nmrep, &xid, &status);
1685	if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
1686		error = lockerror;
1687	if (nfsvers == NFS_VER3)
1688		nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
1689	if (!error)
1690		error = status;
1691	nfsm_chain_get_32(error, &nmrep, len);
1692	nfsmout_if(error);
1693	if ((nfsvers == NFS_VER2) && (len > *buflenp)) {
1694		error = EBADRPC;
1695		goto nfsmout;
1696	}
1697	if (len >= *buflenp) {
1698		if (np->n_size && (np->n_size < *buflenp))
1699			len = np->n_size;
1700		else
1701			len = *buflenp - 1;
1702	}
1703	nfsm_chain_get_opaque(error, &nmrep, len, buf);
1704	if (!error)
1705		*buflenp = len;
1706nfsmout:
1707	if (!lockerror)
1708		nfs_unlock(np);
1709	nfsm_chain_cleanup(&nmreq);
1710	nfsm_chain_cleanup(&nmrep);
1711	return (error);
1712}
1713
1714/*
1715 * NFS read RPC call
1716 * Ditto above
1717 */
1718int
1719nfs_read_rpc(nfsnode_t np, struct uio *uiop, vfs_context_t ctx)
1720{
1721	struct nfsmount *nmp;
1722	int error = 0, nfsvers, eof = 0;
1723	size_t nmrsize, len, retlen, tsiz;
1724	off_t txoffset;
1725	struct nfsreq rq, *req = &rq;
1726
1727	FSDBG_TOP(536, np, uiop->uio_offset, uio_uio_resid(uiop), 0);
1728	nmp = NFSTONMP(np);
1729	if (!nmp)
1730		return (ENXIO);
1731	nfsvers = nmp->nm_vers;
1732	nmrsize = nmp->nm_rsize;
1733
1734	// LP64todo - fix this
1735	tsiz = uio_uio_resid(uiop);
1736	if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && (nfsvers == NFS_VER2)) {
1737		FSDBG_BOT(536, np, uiop->uio_offset, uio_uio_resid(uiop), EFBIG);
1738		return (EFBIG);
1739	}
1740
1741	txoffset = uiop->uio_offset;
1742
1743	while (tsiz > 0) {
1744		len = retlen = (tsiz > nmrsize) ? nmrsize : tsiz;
1745		FSDBG(536, np, txoffset, len, 0);
1746		error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
1747				vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
1748		if (!error)
1749			error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uiop, &retlen, &eof);
1750		if (error)
1751			break;
1752		txoffset += retlen;
1753		tsiz -= retlen;
1754		if (nfsvers != NFS_VER2) {
1755			if (eof || (retlen == 0))
1756				tsiz = 0;
1757		} else if (retlen < len)
1758			tsiz = 0;
1759	}
1760
1761	FSDBG_BOT(536, np, eof, uio_uio_resid(uiop), error);
1762	return (error);
1763}
1764
1765int
1766nfs3_read_rpc_async(
1767	nfsnode_t np,
1768	off_t offset,
1769	size_t len,
1770	thread_t thd,
1771	kauth_cred_t cred,
1772	struct nfsreq_cbinfo *cb,
1773	struct nfsreq **reqp)
1774{
1775	struct nfsmount *nmp;
1776	int error = 0, nfsvers;
1777	struct nfsm_chain nmreq;
1778
1779	nmp = NFSTONMP(np);
1780	if (!nmp)
1781		return (ENXIO);
1782	nfsvers = nmp->nm_vers;
1783
1784	nfsm_chain_null(&nmreq);
1785	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 3 * NFSX_UNSIGNED);
1786	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
1787	if (nfsvers == NFS_VER3) {
1788		nfsm_chain_add_64(error, &nmreq, offset);
1789		nfsm_chain_add_32(error, &nmreq, len);
1790	} else {
1791		nfsm_chain_add_32(error, &nmreq, offset);
1792		nfsm_chain_add_32(error, &nmreq, len);
1793		nfsm_chain_add_32(error, &nmreq, 0);
1794	}
1795	nfsm_chain_build_done(error, &nmreq);
1796	nfsmout_if(error);
1797	error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, cb, reqp);
1798nfsmout:
1799	nfsm_chain_cleanup(&nmreq);
1800	return (error);
1801}
1802
1803int
1804nfs3_read_rpc_async_finish(
1805	nfsnode_t np,
1806	struct nfsreq *req,
1807	struct uio *uiop,
1808	size_t *lenp,
1809	int *eofp)
1810{
1811	int error = 0, lockerror, nfsvers, status, eof = 0;
1812	size_t retlen = 0;
1813	uint64_t xid;
1814	struct nfsmount *nmp;
1815	struct nfsm_chain nmrep;
1816
1817	nmp = NFSTONMP(np);
1818	if (!nmp) {
1819		nfs_request_async_cancel(req);
1820		return (ENXIO);
1821	}
1822	nfsvers = nmp->nm_vers;
1823
1824	nfsm_chain_null(&nmrep);
1825
1826	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
1827	if (error == EINPROGRESS) /* async request restarted */
1828		return (error);
1829
1830	if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
1831		error = lockerror;
1832	if (nfsvers == NFS_VER3)
1833		nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
1834	if (!error)
1835		error = status;
1836	if (nfsvers == NFS_VER3) {
1837		nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
1838		nfsm_chain_get_32(error, &nmrep, eof);
1839	} else {
1840		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
1841	}
1842	if (!lockerror)
1843		nfs_unlock(np);
1844	nfsm_chain_get_32(error, &nmrep, retlen);
1845	if ((nfsvers == NFS_VER2) && (retlen > *lenp))
1846		error = EBADRPC;
1847	nfsmout_if(error);
1848	error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uiop);
1849	if (eofp) {
1850		if (nfsvers == NFS_VER3) {
1851			if (!eof && !retlen)
1852				eof = 1;
1853		} else if (retlen < *lenp) {
1854			eof = 1;
1855		}
1856		*eofp = eof;
1857	}
1858	*lenp = MIN(retlen, *lenp);
1859nfsmout:
1860	nfsm_chain_cleanup(&nmrep);
1861	return (error);
1862}
1863
1864/*
1865 * NFS write call
1866 */
1867int
1868nfs_vnop_write(
1869	struct vnop_write_args /* {
1870		struct vnodeop_desc *a_desc;
1871		vnode_t a_vp;
1872		struct uio *a_uio;
1873		int a_ioflag;
1874		vfs_context_t a_context;
1875	} */ *ap)
1876{
1877	vfs_context_t ctx = ap->a_context;
1878	struct uio *uio = ap->a_uio;
1879	vnode_t vp = ap->a_vp;
1880	nfsnode_t np = VTONFS(vp);
1881	int ioflag = ap->a_ioflag;
1882	struct nfsbuf *bp;
1883	struct nfs_vattr nvattr;
1884	struct nfsmount *nmp = VTONMP(vp);
1885	daddr64_t lbn;
1886	int biosize;
1887	int n, on, error = 0;
1888	off_t boff, start, end;
1889	struct iovec_32 iov;
1890	struct uio auio;
1891	thread_t thd;
1892	kauth_cred_t cred;
1893
1894	FSDBG_TOP(515, np, uio->uio_offset, uio_uio_resid(uio), ioflag);
1895
1896	if (vnode_vtype(vp) != VREG) {
1897		FSDBG_BOT(515, np, uio->uio_offset, uio_uio_resid(uio), EIO);
1898		return (EIO);
1899	}
1900
1901	thd = vfs_context_thread(ctx);
1902	cred = vfs_context_ucred(ctx);
1903
1904	nfs_data_lock(np, NFS_NODE_LOCK_SHARED);
1905
1906	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) {
1907		nfs_data_unlock(np);
1908		FSDBG_BOT(515, np, uio->uio_offset, uio_uio_resid(uio), error);
1909		return (error);
1910	}
1911	np->n_wrbusy++;
1912
1913	if (np->n_flag & NWRITEERR) {
1914		error = np->n_error;
1915		np->n_flag &= ~NWRITEERR;
1916	}
1917	if (np->n_flag & NNEEDINVALIDATE) {
1918		np->n_flag &= ~NNEEDINVALIDATE;
1919		nfs_unlock(np);
1920		nfs_data_unlock(np);
1921		nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1);
1922		nfs_data_lock(np, NFS_NODE_LOCK_SHARED);
1923		if (error || ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))))
1924			goto out;
1925	}
1926	if (error) {
1927		nfs_unlock(np);
1928		goto out;
1929	}
1930
1931	biosize = nmp->nm_biosize;
1932
1933	if (ioflag & (IO_APPEND | IO_SYNC)) {
1934		if (np->n_flag & NMODIFIED) {
1935			NATTRINVALIDATE(np);
1936			nfs_unlock(np);
1937			nfs_data_unlock(np);
1938			error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
1939			nfs_data_lock(np, NFS_NODE_LOCK_SHARED);
1940			if (error || ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))) {
1941				FSDBG(515, np, uio->uio_offset, 0x10bad01, error);
1942				goto out;
1943			}
1944		}
1945		if (ioflag & IO_APPEND) {
1946			NATTRINVALIDATE(np);
1947			nfs_unlock(np);
1948			nfs_data_unlock(np);
1949			error = nfs_getattr(np, &nvattr, ctx, 0);
1950			/* we'll be extending the file, so take the data lock exclusive */
1951			nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
1952			if (error || ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))) {
1953				FSDBG(515, np, uio->uio_offset, 0x10bad02, error);
1954				goto out;
1955			}
1956			uio->uio_offset = np->n_size;
1957		}
1958	}
1959	if (uio->uio_offset < 0) {
1960		nfs_unlock(np);
1961		error = EINVAL;
1962		FSDBG_BOT(515, np, uio->uio_offset, 0xbad0ff, error);
1963		goto out;
1964	}
1965	if (uio_uio_resid(uio) == 0) {
1966		nfs_unlock(np);
1967		goto out;
1968	}
1969
1970	nfs_unlock(np);
1971
1972	if (((uio->uio_offset + uio_uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
1973		/* it looks like we'll be extending the file, so take the data lock exclusive */
1974		nfs_data_unlock(np);
1975		nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE);
1976	}
1977
1978	do {
1979		OSAddAtomic(1, (SInt32*)&nfsstats.biocache_writes);
1980		lbn = uio->uio_offset / biosize;
1981		on = uio->uio_offset % biosize;
1982		// LP64todo - fix this
1983		n = min((unsigned)(biosize - on), uio_uio_resid(uio));
1984again:
1985		/*
1986		 * Get a cache block for writing.  The range to be written is
1987		 * (off..off+n) within the block.  We ensure that the block
1988		 * either has no dirty region or that the given range is
1989		 * contiguous with the existing dirty region.
1990		 */
1991		error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
1992		if (error)
1993			goto out;
1994		/* map the block because we know we're going to write to it */
1995		NFS_BUF_MAP(bp);
1996
1997		if (ioflag & IO_NOCACHE)
1998			SET(bp->nb_flags, NB_NOCACHE);
1999
2000		if (!IS_VALID_CRED(bp->nb_wcred)) {
2001			kauth_cred_ref(cred);
2002			bp->nb_wcred = cred;
2003		}
2004
2005		/*
2006		 * If there's already a dirty range AND dirty pages in this block we
2007		 * need to send a commit AND write the dirty pages before continuing.
2008		 *
2009		 * If there's already a dirty range OR dirty pages in this block
2010		 * and the new write range is not contiguous with the existing range,
2011		 * then force the buffer to be written out now.
2012		 * (We used to just extend the dirty range to cover the valid,
2013		 * but unwritten, data in between also.  But writing ranges
2014		 * of data that weren't actually written by an application
2015		 * risks overwriting some other client's data with stale data
2016		 * that's just masquerading as new written data.)
2017		 */
2018		if (bp->nb_dirtyend > 0) {
2019		    if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || bp->nb_dirty) {
2020			FSDBG(515, np, uio->uio_offset, bp, 0xd15c001);
2021			/* write/commit buffer "synchronously" */
2022			/* (NB_STABLE indicates that data writes should be FILESYNC) */
2023			CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
2024			SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
2025			error = nfs_buf_write(bp);
2026			if (error)
2027			    goto out;
2028			goto again;
2029		    }
2030		} else if (bp->nb_dirty) {
2031		    int firstpg, lastpg;
2032		    u_int32_t pagemask;
2033		    /* calculate write range pagemask */
2034		    firstpg = on/PAGE_SIZE;
2035		    lastpg = (on+n-1)/PAGE_SIZE;
2036		    pagemask = ((1 << (lastpg+1)) - 1) & ~((1 << firstpg) - 1);
2037		    /* check if there are dirty pages outside the write range */
2038		    if (bp->nb_dirty & ~pagemask) {
2039			FSDBG(515, np, uio->uio_offset, bp, 0xd15c002);
2040			/* write/commit buffer "synchronously" */
2041			/* (NB_STABLE indicates that data writes should be FILESYNC) */
2042			CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
2043			SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
2044			error = nfs_buf_write(bp);
2045			if (error)
2046			    goto out;
2047			goto again;
2048		    }
2049		    /* if the first or last pages are already dirty */
2050		    /* make sure that the dirty range encompasses those pages */
2051		    if (NBPGDIRTY(bp,firstpg) || NBPGDIRTY(bp,lastpg)) {
2052			FSDBG(515, np, uio->uio_offset, bp, 0xd15c003);
2053		    	bp->nb_dirtyoff = min(on, firstpg * PAGE_SIZE);
2054			if (NBPGDIRTY(bp,lastpg)) {
2055			    bp->nb_dirtyend = (lastpg+1) * PAGE_SIZE;
2056			    /* clip to EOF */
2057			    if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
2058				    bp->nb_dirtyend = np->n_size - NBOFF(bp);
2059				    if (bp->nb_dirtyoff >= bp->nb_dirtyend)
2060					    bp->nb_dirtyoff = bp->nb_dirtyend = 0;
2061			    }
2062			} else
2063			    bp->nb_dirtyend = on+n;
2064		    }
2065		}
2066
2067		/*
2068		 * Are we extending the size of the file with this write?
2069		 * If so, update file size now that we have the block.
2070		 * If there was a partial buf at the old eof, validate
2071		 * and zero the new bytes.
2072		 */
2073		if ((uio->uio_offset + n) > (off_t)np->n_size) {
2074			struct nfsbuf *eofbp = NULL;
2075			daddr64_t eofbn = np->n_size / biosize;
2076			int eofoff = np->n_size % biosize;
2077			int neweofoff = (uio->uio_offset + n) % biosize;
2078
2079			FSDBG(515, 0xb1ffa000, uio->uio_offset + n, eofoff, neweofoff);
2080
2081			if (eofoff && (eofbn < lbn) &&
2082			    ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE|NBLK_ONLYVALID, &eofbp))))
2083				goto out;
2084
2085			/* if we're extending within the same last block */
2086			/* and the block is flagged as being cached... */
2087			if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
2088				/* ...check that all pages in buffer are valid */
2089				int endpg = ((neweofoff ? neweofoff : biosize) - 1)/PAGE_SIZE;
2090				u_int32_t pagemask;
2091				/* pagemask only has to extend to last page being written to */
2092				pagemask = (1 << (endpg+1)) - 1;
2093				FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
2094				if ((bp->nb_valid & pagemask) != pagemask) {
2095					/* zerofill any hole */
2096					if (on > bp->nb_validend) {
2097						int i;
2098						for (i=bp->nb_validend/PAGE_SIZE; i <= (on - 1)/PAGE_SIZE; i++)
2099							NBPGVALID_SET(bp, i);
2100						NFS_BUF_MAP(bp);
2101						FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
2102						bzero((char *)bp->nb_data + bp->nb_validend,
2103							on - bp->nb_validend);
2104					}
2105					/* zerofill any trailing data in the last page */
2106					if (neweofoff) {
2107						NFS_BUF_MAP(bp);
2108						FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
2109						bzero((char *)bp->nb_data + neweofoff,
2110							PAGE_SIZE - (neweofoff & PAGE_MASK));
2111					}
2112				}
2113			}
2114			np->n_size = uio->uio_offset + n;
2115			nfs_lock(np, NFS_NODE_LOCK_FORCE);
2116			CLR(np->n_flag, NUPDATESIZE);
2117			np->n_flag |= NMODIFIED;
2118			nfs_unlock(np);
2119			FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
2120			ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
2121			if (eofbp) {
2122				/*
2123				 * We may need to zero any previously invalid data
2124				 * after the old EOF in the previous EOF buffer.
2125				 *
2126				 * For the old last page, don't zero bytes if there
2127				 * are invalid bytes in that page (i.e. the page isn't
2128				 * currently valid).
2129				 * For pages after the old last page, zero them and
2130				 * mark them as valid.
2131				 */
2132				char *d;
2133				int i;
2134				if (ioflag & IO_NOCACHE)
2135					SET(eofbp->nb_flags, NB_NOCACHE);
2136				NFS_BUF_MAP(eofbp);
2137				FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
2138				d = eofbp->nb_data;
2139				i = eofoff/PAGE_SIZE;
2140				while (eofoff < biosize) {
2141					int poff = eofoff & PAGE_MASK;
2142					if (!poff || NBPGVALID(eofbp,i)) {
2143						bzero(d + eofoff, PAGE_SIZE - poff);
2144						NBPGVALID_SET(eofbp, i);
2145					}
2146					if (bp->nb_validend == eofoff)
2147						bp->nb_validend += PAGE_SIZE - poff;
2148					eofoff += PAGE_SIZE - poff;
2149					i++;
2150				}
2151				nfs_buf_release(eofbp, 1);
2152			}
2153		}
2154		/*
2155		 * If dirtyend exceeds file size, chop it down.  This should
2156		 * not occur unless there is a race.
2157		 */
2158		if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
2159			bp->nb_dirtyend = np->n_size - NBOFF(bp);
2160			if (bp->nb_dirtyoff >= bp->nb_dirtyend)
2161				bp->nb_dirtyoff = bp->nb_dirtyend = 0;
2162		}
2163		/*
2164		 * UBC doesn't handle partial pages, so we need to make sure
2165		 * that any pages left in the page cache are completely valid.
2166		 *
2167		 * Writes that are smaller than a block are delayed if they
2168		 * don't extend to the end of the block.
2169		 *
2170		 * If the block isn't (completely) cached, we may need to read
2171		 * in some parts of pages that aren't covered by the write.
2172		 * If the write offset (on) isn't page aligned, we'll need to
2173		 * read the start of the first page being written to.  Likewise,
2174		 * if the offset of the end of the write (on+n) isn't page aligned,
2175		 * we'll need to read the end of the last page being written to.
2176		 *
2177		 * Notes:
2178		 * We don't want to read anything we're just going to write over.
2179		 * We don't want to issue multiple I/Os if we don't have to
2180		 *   (because they're synchronous rpcs).
2181		 * We don't want to read anything we already have modified in the
2182		 *   page cache.
2183		 */
2184		if (!ISSET(bp->nb_flags, NB_NOCACHE) && !ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
2185			int firstpg, lastpg, dirtypg;
2186			int firstpgoff, lastpgoff;
2187			start = end = -1;
2188			firstpg = on/PAGE_SIZE;
2189			firstpgoff = on & PAGE_MASK;
2190			lastpg = (on+n-1)/PAGE_SIZE;
2191			lastpgoff = (on+n) & PAGE_MASK;
2192			if (firstpgoff && !NBPGVALID(bp,firstpg)) {
2193				/* need to read start of first page */
2194				start = firstpg * PAGE_SIZE;
2195				end = start + firstpgoff;
2196			}
2197			if (lastpgoff && !NBPGVALID(bp,lastpg)) {
2198				/* need to read end of last page */
2199				if (start < 0)
2200					start = (lastpg * PAGE_SIZE) + lastpgoff;
2201				end = (lastpg + 1) * PAGE_SIZE;
2202			}
2203			if (end > start) {
2204				/* need to read the data in range: start...end-1 */
2205
2206				/* first, check for dirty pages in between */
2207				/* if there are, we'll have to do two reads because */
2208				/* we don't want to overwrite the dirty pages. */
2209				for (dirtypg=start/PAGE_SIZE; dirtypg <= (end-1)/PAGE_SIZE; dirtypg++)
2210					if (NBPGDIRTY(bp,dirtypg))
2211						break;
2212
2213				/* if start is at beginning of page, try */
2214				/* to get any preceeding pages as well. */
2215				if (!(start & PAGE_MASK)) {
2216					/* stop at next dirty/valid page or start of block */
2217					for (; start > 0; start-=PAGE_SIZE)
2218						if (NBPGVALID(bp,((start-1)/PAGE_SIZE)))
2219							break;
2220				}
2221
2222				NFS_BUF_MAP(bp);
2223				/* setup uio for read(s) */
2224				boff = NBOFF(bp);
2225				auio.uio_iovs.iov32p = &iov;
2226				auio.uio_iovcnt = 1;
2227#if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
2228				auio.uio_segflg = UIO_SYSSPACE;
2229#else
2230				auio.uio_segflg = UIO_SYSSPACE32;
2231#endif
2232				auio.uio_rw = UIO_READ;
2233
2234				if (dirtypg <= (end-1)/PAGE_SIZE) {
2235					/* there's a dirty page in the way, so just do two reads */
2236					/* we'll read the preceding data here */
2237					auio.uio_offset = boff + start;
2238					iov.iov_len = on - start;
2239					uio_uio_resid_set(&auio, iov.iov_len);
2240					iov.iov_base = (uintptr_t) bp->nb_data + start;
2241					error = nfs_read_rpc(np, &auio, ctx);
2242					if (error) /* couldn't read the data, so treat buffer as NOCACHE */
2243						SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
2244					if (uio_uio_resid(&auio) > 0) {
2245						FSDBG(516, bp, (caddr_t)iov.iov_base - bp->nb_data, uio_uio_resid(&auio), 0xd00dee01);
2246						// LP64todo - fix this
2247						bzero((caddr_t)iov.iov_base, uio_uio_resid(&auio));
2248					}
2249					if (!error) {
2250						/* update validoff/validend if necessary */
2251						if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
2252							bp->nb_validoff = start;
2253						if ((bp->nb_validend < 0) || (bp->nb_validend < on))
2254							bp->nb_validend = on;
2255						if ((off_t)np->n_size > boff + bp->nb_validend)
2256							bp->nb_validend = min(np->n_size - (boff + start), biosize);
2257						/* validate any pages before the write offset */
2258						for (; start < on/PAGE_SIZE; start+=PAGE_SIZE)
2259							NBPGVALID_SET(bp, start/PAGE_SIZE);
2260					}
2261					/* adjust start to read any trailing data */
2262					start = on+n;
2263				}
2264
2265				/* if end is at end of page, try to */
2266				/* get any following pages as well. */
2267				if (!(end & PAGE_MASK)) {
2268					/* stop at next valid page or end of block */
2269					for (; end < biosize; end+=PAGE_SIZE)
2270						if (NBPGVALID(bp,end/PAGE_SIZE))
2271							break;
2272				}
2273
2274				if (((boff+start) >= (off_t)np->n_size) ||
2275				    ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
2276					/*
2277					 * Either this entire read is beyond the current EOF
2278					 * or the range that we won't be modifying (on+n...end)
2279					 * is all beyond the current EOF.
2280					 * No need to make a trip across the network to
2281					 * read nothing.  So, just zero the buffer instead.
2282					 */
2283					FSDBG(516, bp, start, end - start, 0xd00dee00);
2284					bzero(bp->nb_data + start, end - start);
2285					error = 0;
2286				} else if (!ISSET(bp->nb_flags, NB_NOCACHE)) {
2287					/* now we'll read the (rest of the) data */
2288					auio.uio_offset = boff + start;
2289					iov.iov_len = end - start;
2290					uio_uio_resid_set(&auio, iov.iov_len);
2291					iov.iov_base = (uintptr_t) (bp->nb_data + start);
2292					error = nfs_read_rpc(np, &auio, ctx);
2293					if (error) /* couldn't read the data, so treat buffer as NOCACHE */
2294						SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
2295					if (uio_uio_resid(&auio) > 0) {
2296						FSDBG(516, bp, (caddr_t)iov.iov_base - bp->nb_data, uio_uio_resid(&auio), 0xd00dee02);
2297						// LP64todo - fix this
2298						bzero((caddr_t)iov.iov_base, uio_uio_resid(&auio));
2299					}
2300				}
2301				if (!error) {
2302					/* update validoff/validend if necessary */
2303					if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
2304						bp->nb_validoff = start;
2305					if ((bp->nb_validend < 0) || (bp->nb_validend < end))
2306						bp->nb_validend = end;
2307					if ((off_t)np->n_size > boff + bp->nb_validend)
2308						bp->nb_validend = min(np->n_size - (boff + start), biosize);
2309					/* validate any pages before the write offset's page */
2310					for (; start < trunc_page_32(on); start+=PAGE_SIZE)
2311						NBPGVALID_SET(bp, start/PAGE_SIZE);
2312					/* validate any pages after the range of pages being written to */
2313					for (; (end - 1) > round_page_32(on+n-1); end-=PAGE_SIZE)
2314						NBPGVALID_SET(bp, (end-1)/PAGE_SIZE);
2315				}
2316				/* Note: pages being written to will be validated when written */
2317			}
2318		}
2319
2320		if (ISSET(bp->nb_flags, NB_ERROR)) {
2321			error = bp->nb_error;
2322			nfs_buf_release(bp, 1);
2323			goto out;
2324		}
2325
2326		nfs_lock(np, NFS_NODE_LOCK_FORCE);
2327		np->n_flag |= NMODIFIED;
2328		nfs_unlock(np);
2329
2330		NFS_BUF_MAP(bp);
2331		error = uiomove((char *)bp->nb_data + on, n, uio);
2332		if (error) {
2333			SET(bp->nb_flags, NB_ERROR);
2334			nfs_buf_release(bp, 1);
2335			goto out;
2336		}
2337
2338		/* validate any pages written to */
2339		start = on & ~PAGE_MASK;
2340		for (; start < on+n; start += PAGE_SIZE) {
2341			NBPGVALID_SET(bp, start/PAGE_SIZE);
2342			/*
2343			 * This may seem a little weird, but we don't actually set the
2344			 * dirty bits for writes.  This is because we keep the dirty range
2345			 * in the nb_dirtyoff/nb_dirtyend fields.  Also, particularly for
2346			 * delayed writes, when we give the pages back to the VM we don't
2347			 * want to keep them marked dirty, because when we later write the
2348			 * buffer we won't be able to tell which pages were written dirty
2349			 * and which pages were mmapped and dirtied.
2350			 */
2351		}
2352		if (bp->nb_dirtyend > 0) {
2353			bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
2354			bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
2355		} else {
2356			bp->nb_dirtyoff = on;
2357			bp->nb_dirtyend = on + n;
2358		}
2359		if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
2360		    bp->nb_validoff > bp->nb_dirtyend) {
2361			bp->nb_validoff = bp->nb_dirtyoff;
2362			bp->nb_validend = bp->nb_dirtyend;
2363		} else {
2364			bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
2365			bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
2366		}
2367		if (!ISSET(bp->nb_flags, NB_CACHE))
2368			nfs_buf_normalize_valid_range(np, bp);
2369
2370		/*
2371		 * Since this block is being modified, it must be written
2372		 * again and not just committed.
2373		 */
2374		if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
2375			nfs_lock(np, NFS_NODE_LOCK_FORCE);
2376			if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
2377				np->n_needcommitcnt--;
2378				CHECK_NEEDCOMMITCNT(np);
2379			}
2380			CLR(bp->nb_flags, NB_NEEDCOMMIT);
2381			nfs_unlock(np);
2382		}
2383
2384		if (ioflag & IO_SYNC) {
2385			error = nfs_buf_write(bp);
2386			if (error)
2387				goto out;
2388		} else if (((n + on) == biosize) || (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
2389			SET(bp->nb_flags, NB_ASYNC);
2390			error = nfs_buf_write(bp);
2391			if (error)
2392				goto out;
2393		} else {
2394			/* If the block wasn't already delayed: charge for the write */
2395			if (!ISSET(bp->nb_flags, NB_DELWRI)) {
2396				proc_t p = vfs_context_proc(ctx);
2397				if (p && p->p_stats)
2398					OSIncrementAtomic(&p->p_stats->p_ru.ru_oublock);
2399			}
2400			nfs_buf_write_delayed(bp);
2401		}
2402		if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS)
2403		        nfs_flushcommits(np, 1);
2404
2405	} while (uio_uio_resid(uio) > 0 && n > 0);
2406
2407out:
2408	nfs_lock(np, NFS_NODE_LOCK_FORCE);
2409	np->n_wrbusy--;
2410	nfs_unlock(np);
2411	nfs_data_unlock(np);
2412	FSDBG_BOT(515, np, uio->uio_offset, uio_uio_resid(uio), error);
2413	return (error);
2414}
2415
2416
2417/*
2418 * NFS write call
2419 */
2420int
2421nfs_write_rpc(
2422	nfsnode_t np,
2423	struct uio *uiop,
2424	vfs_context_t ctx,
2425	int *iomodep,
2426	uint64_t *wverfp)
2427{
2428	return nfs_write_rpc2(np, uiop, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
2429}
2430
2431int
2432nfs_write_rpc2(
2433	nfsnode_t np,
2434	struct uio *uiop,
2435	thread_t thd,
2436	kauth_cred_t cred,
2437	int *iomodep,
2438	uint64_t *wverfp)
2439{
2440	struct nfsmount *nmp;
2441	int error = 0, nfsvers, restart;
2442	int backup, wverfset, commit, committed;
2443	uint64_t wverf = 0, wverf2;
2444	size_t nmwsize, totalsize, tsiz, len, rlen;
2445	struct nfsreq rq, *req = &rq;
2446
2447#if DIAGNOSTIC
2448	/* XXX limitation based on need to back up uio on short write */
2449	if (uiop->uio_iovcnt != 1)
2450		panic("nfs3_write_rpc: iovcnt > 1");
2451#endif
2452	FSDBG_TOP(537, np, uiop->uio_offset, uio_uio_resid(uiop), *iomodep);
2453	nmp = NFSTONMP(np);
2454	if (!nmp)
2455		return (ENXIO);
2456	nfsvers = nmp->nm_vers;
2457	nmwsize = nmp->nm_wsize;
2458
2459	restart = wverfset = 0;
2460	committed = NFS_WRITE_FILESYNC;
2461
2462	// LP64todo - fix this
2463	totalsize = tsiz = uio_uio_resid(uiop);
2464	if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && (nfsvers == NFS_VER2)) {
2465		FSDBG_BOT(537, np, uiop->uio_offset, uio_uio_resid(uiop), EFBIG);
2466		return (EFBIG);
2467	}
2468
2469	while (tsiz > 0) {
2470		len = (tsiz > nmwsize) ? nmwsize : tsiz;
2471		FSDBG(537, np, uiop->uio_offset, len, 0);
2472		error = nmp->nm_funcs->nf_write_rpc_async(np, uiop, len, thd, cred, *iomodep, NULL, &req);
2473		if (!error)
2474			error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
2475		nmp = NFSTONMP(np);
2476		if (!nmp)
2477			error = ENXIO;
2478		if (error)
2479			break;
2480		if (nfsvers == NFS_VER2) {
2481			tsiz -= len;
2482			continue;
2483		}
2484
2485		/* check for a short write */
2486		if (rlen < len) {
2487			backup = len - rlen;
2488			uio_iov_base_add(uiop, -backup);
2489			uio_iov_len_add(uiop, backup);
2490			uiop->uio_offset -= backup;
2491			uio_uio_resid_add(uiop, backup);
2492			len = rlen;
2493		}
2494
2495		/* return lowest commit level returned */
2496		if (commit < committed)
2497			committed = commit;
2498
2499		tsiz -= len;
2500
2501		/* check write verifier */
2502		if (!wverfset) {
2503			wverf = wverf2;
2504			wverfset = 1;
2505		} else if (wverf != wverf2) {
2506			/* verifier changed, so we need to restart all the writes */
2507			if (++restart > 10) {
2508				/* give up after too many restarts */
2509				error = EIO;
2510				break;
2511			}
2512			backup = totalsize - tsiz;
2513			uio_iov_base_add(uiop, -backup);
2514			uio_iov_len_add(uiop, backup);
2515			uiop->uio_offset -= backup;
2516			uio_uio_resid_add(uiop, backup);
2517			committed = NFS_WRITE_FILESYNC;
2518			wverfset = 0;
2519			tsiz = totalsize;
2520		}
2521	}
2522	if (wverfset && wverfp)
2523		*wverfp = wverf;
2524	*iomodep = committed;
2525	if (error)
2526		uio_uio_resid_set(uiop, tsiz);
2527	FSDBG_BOT(537, np, committed, uio_uio_resid(uiop), error);
2528	return (error);
2529}
2530
2531int
2532nfs3_write_rpc_async(
2533	nfsnode_t np,
2534	struct uio *uiop,
2535	size_t len,
2536	thread_t thd,
2537	kauth_cred_t cred,
2538	int iomode,
2539	struct nfsreq_cbinfo *cb,
2540	struct nfsreq **reqp)
2541{
2542	struct nfsmount *nmp;
2543	int error = 0, nfsvers;
2544	off_t offset;
2545	struct nfsm_chain nmreq;
2546
2547	nmp = NFSTONMP(np);
2548	if (!nmp)
2549		return (ENXIO);
2550	nfsvers = nmp->nm_vers;
2551
2552	offset = uiop->uio_offset;
2553
2554	nfsm_chain_null(&nmreq);
2555	nfsm_chain_build_alloc_init(error, &nmreq,
2556		NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
2557	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
2558	if (nfsvers == NFS_VER3) {
2559		nfsm_chain_add_64(error, &nmreq, offset);
2560		nfsm_chain_add_32(error, &nmreq, len);
2561		nfsm_chain_add_32(error, &nmreq, iomode);
2562	} else {
2563		nfsm_chain_add_32(error, &nmreq, 0);
2564		nfsm_chain_add_32(error, &nmreq, offset);
2565		nfsm_chain_add_32(error, &nmreq, 0);
2566	}
2567	nfsm_chain_add_32(error, &nmreq, len);
2568	nfsmout_if(error);
2569	error = nfsm_chain_add_uio(&nmreq, uiop, len);
2570	nfsm_chain_build_done(error, &nmreq);
2571	nfsmout_if(error);
2572	error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, cb, reqp);
2573nfsmout:
2574	nfsm_chain_cleanup(&nmreq);
2575	return (error);
2576}
2577
2578int
2579nfs3_write_rpc_async_finish(
2580	nfsnode_t np,
2581	struct nfsreq *req,
2582	int *iomodep,
2583	size_t *rlenp,
2584	uint64_t *wverfp)
2585{
2586	struct nfsmount *nmp;
2587	int error = 0, lockerror = ENOENT, nfsvers, status;
2588	int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
2589	u_int64_t xid, wverf;
2590	mount_t mp;
2591	struct nfsm_chain nmrep;
2592
2593	nmp = NFSTONMP(np);
2594	if (!nmp) {
2595		nfs_request_async_cancel(req);
2596		return (ENXIO);
2597	}
2598	nfsvers = nmp->nm_vers;
2599
2600	nfsm_chain_null(&nmrep);
2601
2602	error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2603	if (error == EINPROGRESS) /* async request restarted */
2604		return (error);
2605	nmp = NFSTONMP(np);
2606	if (!nmp)
2607		error = ENXIO;
2608	if (!error && (lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
2609		error = lockerror;
2610	if (nfsvers == NFS_VER3) {
2611		struct timespec premtime = { 0, 0 };
2612		nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
2613		if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
2614			updatemtime = 1;
2615		if (!error)
2616			error = status;
2617		nfsm_chain_get_32(error, &nmrep, rlen);
2618		nfsmout_if(error);
2619		*rlenp = rlen;
2620		if (rlen <= 0)
2621			error = NFSERR_IO;
2622		nfsm_chain_get_32(error, &nmrep, committed);
2623		nfsm_chain_get_64(error, &nmrep, wverf);
2624		nfsmout_if(error);
2625		if (wverfp)
2626			*wverfp = wverf;
2627		lck_mtx_lock(&nmp->nm_lock);
2628		if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
2629			nmp->nm_verf = wverf;
2630			nmp->nm_state |= NFSSTA_HASWRITEVERF;
2631		} else if (nmp->nm_verf != wverf) {
2632			nmp->nm_verf = wverf;
2633		}
2634		lck_mtx_unlock(&nmp->nm_lock);
2635	} else {
2636		if (!error)
2637			error = status;
2638		nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
2639		nfsmout_if(error);
2640	}
2641	if (updatemtime)
2642		NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
2643nfsmout:
2644	if (!lockerror)
2645		nfs_unlock(np);
2646	nfsm_chain_cleanup(&nmrep);
2647	if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
2648	    ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
2649		committed = NFS_WRITE_FILESYNC;
2650	*iomodep = committed;
2651	return (error);
2652}
2653
2654/*
2655 * NFS mknod vnode op
2656 *
2657 * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the
2658 * mode set to specify the file type and the size field for rdev.
2659 */
2660static int
2661nfs3_vnop_mknod(
2662	struct vnop_mknod_args /* {
2663		struct vnodeop_desc *a_desc;
2664		vnode_t a_dvp;
2665		vnode_t *a_vpp;
2666		struct componentname *a_cnp;
2667		struct vnode_attr *a_vap;
2668		vfs_context_t a_context;
2669	} */ *ap)
2670{
2671	vnode_t dvp = ap->a_dvp;
2672	vnode_t *vpp = ap->a_vpp;
2673	struct componentname *cnp = ap->a_cnp;
2674	struct vnode_attr *vap = ap->a_vap;
2675	vfs_context_t ctx = ap->a_context;
2676	vnode_t newvp = NULL;
2677	nfsnode_t np = NULL;
2678	struct nfsmount *nmp;
2679	nfsnode_t dnp = VTONFS(dvp);
2680	struct nfs_vattr nvattr, dnvattr;
2681	fhandle_t fh;
2682	int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
2683	struct timespec premtime = { 0, 0 };
2684	u_long rdev;
2685	u_int64_t xid, dxid;
2686	int nfsvers, gotuid, gotgid;
2687	struct nfsm_chain nmreq, nmrep;
2688
2689	nmp = VTONMP(dvp);
2690	if (!nmp)
2691		return (ENXIO);
2692	nfsvers = nmp->nm_vers;
2693
2694	if (!VATTR_IS_ACTIVE(vap, va_type))
2695		return (EINVAL);
2696	if (vap->va_type == VCHR || vap->va_type == VBLK) {
2697		if (!VATTR_IS_ACTIVE(vap, va_rdev))
2698			return (EINVAL);
2699		rdev = vap->va_rdev;
2700	} else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
2701		rdev = 0xffffffff;
2702	else {
2703		return (ENOTSUP);
2704	}
2705	if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
2706		return (ENAMETOOLONG);
2707
2708	VATTR_SET_SUPPORTED(vap, va_mode);
2709	VATTR_SET_SUPPORTED(vap, va_uid);
2710	VATTR_SET_SUPPORTED(vap, va_gid);
2711	VATTR_SET_SUPPORTED(vap, va_data_size);
2712	VATTR_SET_SUPPORTED(vap, va_access_time);
2713	VATTR_SET_SUPPORTED(vap, va_modify_time);
2714	gotuid = VATTR_IS_ACTIVE(vap, va_uid);
2715	gotgid = VATTR_IS_ACTIVE(vap, va_gid);
2716
2717	nfsm_chain_null(&nmreq);
2718	nfsm_chain_null(&nmrep);
2719
2720	nfsm_chain_build_alloc_init(error, &nmreq,
2721		NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
2722		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
2723	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
2724	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
2725	if (nfsvers == NFS_VER3) {
2726		nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
2727		nfsm_chain_add_v3sattr(error, &nmreq, vap);
2728		if (vap->va_type == VCHR || vap->va_type == VBLK) {
2729			nfsm_chain_add_32(error, &nmreq, major(vap->va_rdev));
2730			nfsm_chain_add_32(error, &nmreq, minor(vap->va_rdev));
2731		}
2732	} else {
2733		nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev);
2734	}
2735	nfsm_chain_build_done(error, &nmreq);
2736	nfsmout_if(error);
2737	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
2738		error = lockerror;
2739	nfsmout_if(error);
2740
2741	error = nfs_request(dnp, NULL, &nmreq, NFSPROC_MKNOD, ctx, &nmrep, &xid, &status);
2742
2743	/* XXX no EEXIST kludge here? */
2744	dxid = xid;
2745	if (!error && !status) {
2746		if (dnp->n_flag & NNEGNCENTRIES) {
2747			dnp->n_flag &= ~NNEGNCENTRIES;
2748			cache_purge_negatives(dvp);
2749		}
2750		error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
2751	}
2752	if (nfsvers == NFS_VER3)
2753		nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
2754	if (!error)
2755		error = status;
2756nfsmout:
2757	nfsm_chain_cleanup(&nmreq);
2758	nfsm_chain_cleanup(&nmrep);
2759
2760	if (!lockerror) {
2761		dnp->n_flag |= NMODIFIED;
2762		/* if directory hadn't changed, update namecache mtime */
2763		if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
2764			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
2765		if (!wccpostattr)
2766			NATTRINVALIDATE(dnp);
2767		if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) {
2768			if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) {
2769				dnp->n_flag &= ~NNEGNCENTRIES;
2770				cache_purge(dvp);
2771				NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr);
2772			}
2773		}
2774	}
2775
2776	if (!error && fh.fh_len)
2777		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
2778	if (!error && !np)
2779		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
2780	if (!error && np)
2781		newvp = NFSTOV(np);
2782	if (!lockerror)
2783		nfs_unlock(dnp);
2784
2785	if (!error && (gotuid || gotgid) &&
2786	    (!newvp || nfs_getattrcache(np, &nvattr, 1) ||
2787	     (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
2788	     (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
2789		/* clear ID bits if server didn't use them (or we can't tell) */
2790		VATTR_CLEAR_SUPPORTED(vap, va_uid);
2791		VATTR_CLEAR_SUPPORTED(vap, va_gid);
2792	}
2793	if (error) {
2794		if (newvp) {
2795			nfs_unlock(np);
2796			vnode_put(newvp);
2797		}
2798	} else {
2799		*vpp = newvp;
2800		nfs_unlock(np);
2801	}
2802	return (error);
2803}
2804
2805static u_long create_verf;
2806/*
2807 * NFS file create call
2808 */
2809static int
2810nfs3_vnop_create(
2811	struct vnop_create_args /* {
2812		struct vnodeop_desc *a_desc;
2813		vnode_t a_dvp;
2814		vnode_t *a_vpp;
2815		struct componentname *a_cnp;
2816		struct vnode_attr *a_vap;
2817		vfs_context_t a_context;
2818	} */ *ap)
2819{
2820	vfs_context_t ctx = ap->a_context;
2821	vnode_t dvp = ap->a_dvp;
2822	struct vnode_attr *vap = ap->a_vap;
2823	struct componentname *cnp = ap->a_cnp;
2824	struct nfs_vattr nvattr, dnvattr;
2825	fhandle_t fh;
2826	nfsnode_t np = NULL;
2827	struct nfsmount *nmp;
2828	nfsnode_t dnp = VTONFS(dvp);
2829	vnode_t newvp = NULL;
2830	int error = 0, lockerror = ENOENT, status, wccpostattr = 0, fmode = 0;
2831	struct timespec premtime = { 0, 0 };
2832	int nfsvers, gotuid, gotgid;
2833	u_int64_t xid, dxid;
2834	uint32_t val;
2835	struct nfsm_chain nmreq, nmrep;
2836	struct nfsreq *req;
2837	struct nfs_dulookup dul;
2838
2839	nmp = VTONMP(dvp);
2840	if (!nmp)
2841		return (ENXIO);
2842	nfsvers = nmp->nm_vers;
2843
2844	if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
2845		return (ENAMETOOLONG);
2846
2847	VATTR_SET_SUPPORTED(vap, va_mode);
2848	VATTR_SET_SUPPORTED(vap, va_uid);
2849	VATTR_SET_SUPPORTED(vap, va_gid);
2850	VATTR_SET_SUPPORTED(vap, va_data_size);
2851	VATTR_SET_SUPPORTED(vap, va_access_time);
2852	VATTR_SET_SUPPORTED(vap, va_modify_time);
2853	gotuid = VATTR_IS_ACTIVE(vap, va_uid);
2854	gotgid = VATTR_IS_ACTIVE(vap, va_gid);
2855
2856	if (vap->va_vaflags & VA_EXCLUSIVE)
2857		fmode |= O_EXCL;
2858
2859again:
2860	req = NULL;
2861	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen);
2862
2863	nfsm_chain_null(&nmreq);
2864	nfsm_chain_null(&nmrep);
2865
2866	nfsm_chain_build_alloc_init(error, &nmreq,
2867		NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
2868		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
2869	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
2870	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
2871	if (nfsvers == NFS_VER3) {
2872		if (fmode & O_EXCL) {
2873			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
2874			if (!TAILQ_EMPTY(&in_ifaddrhead))
2875				val = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
2876			else
2877				val = create_verf;
2878			nfsm_chain_add_32(error, &nmreq, val);
2879			++create_verf;
2880			nfsm_chain_add_32(error, &nmreq, create_verf);
2881		} else {
2882			nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
2883			nfsm_chain_add_v3sattr(error, &nmreq, vap);
2884		}
2885	} else {
2886		nfsm_chain_add_v2sattr(error, &nmreq, vap, 0);
2887	}
2888	nfsm_chain_build_done(error, &nmreq);
2889	nfsmout_if(error);
2890	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
2891		error = lockerror;
2892	nfsmout_if(error);
2893
2894	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE,
2895			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
2896	if (!error) {
2897		nfs_dulookup_start(&dul, dnp, ctx);
2898		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
2899	}
2900
2901	dxid = xid;
2902	if (!error && !status) {
2903		if (dnp->n_flag & NNEGNCENTRIES) {
2904			dnp->n_flag &= ~NNEGNCENTRIES;
2905			cache_purge_negatives(dvp);
2906		}
2907		error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
2908	}
2909	if (nfsvers == NFS_VER3)
2910		nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
2911	if (!error)
2912		error = status;
2913nfsmout:
2914	nfsm_chain_cleanup(&nmreq);
2915	nfsm_chain_cleanup(&nmrep);
2916
2917	if (!lockerror) {
2918		dnp->n_flag |= NMODIFIED;
2919		/* if directory hadn't changed, update namecache mtime */
2920		if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
2921			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
2922		if (!wccpostattr)
2923			NATTRINVALIDATE(dnp);
2924		if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) {
2925			if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) {
2926				dnp->n_flag &= ~NNEGNCENTRIES;
2927				cache_purge(dvp);
2928				NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr);
2929			}
2930		}
2931	}
2932
2933	if (!error && fh.fh_len)
2934		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
2935	if (!error && !np)
2936		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
2937	if (!error && np)
2938		newvp = NFSTOV(np);
2939
2940	nfs_dulookup_finish(&dul, dnp, ctx);
2941	if (!lockerror)
2942		nfs_unlock(dnp);
2943
2944	if (error) {
2945		if ((nfsvers == NFS_VER3) && (fmode & O_EXCL) && (error == NFSERR_NOTSUPP)) {
2946			fmode &= ~O_EXCL;
2947			goto again;
2948		}
2949		if (newvp) {
2950			nfs_unlock(np);
2951			vnode_put(newvp);
2952		}
2953	} else if ((nfsvers == NFS_VER3) && (fmode & O_EXCL)) {
2954		error = nfs3_setattr_rpc(np, vap, ctx, 1);
2955		if (error && (gotuid || gotgid)) {
2956			/* it's possible the server didn't like our attempt to set IDs. */
2957			/* so, let's try it again without those */
2958			VATTR_CLEAR_ACTIVE(vap, va_uid);
2959			VATTR_CLEAR_ACTIVE(vap, va_gid);
2960			error = nfs3_setattr_rpc(np, vap, ctx, 1);
2961		}
2962		if (error) {
2963			nfs_unlock(np);
2964			vnode_put(newvp);
2965		}
2966	}
2967	if (!error)
2968		*ap->a_vpp = newvp;
2969	if (!error && (gotuid || gotgid) &&
2970	    (!newvp || nfs_getattrcache(np, &nvattr, 1) ||
2971	     (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
2972	     (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
2973		/* clear ID bits if server didn't use them (or we can't tell) */
2974		VATTR_CLEAR_SUPPORTED(vap, va_uid);
2975		VATTR_CLEAR_SUPPORTED(vap, va_gid);
2976	}
2977	if (!error)
2978		nfs_unlock(np);
2979	return (error);
2980}
2981
2982/*
2983 * NFS file remove call
2984 * To try and make NFS semantics closer to UFS semantics, a file that has
2985 * other processes using the vnode is renamed instead of removed and then
2986 * removed later on the last close.
2987 * - If vnode_isinuse()
2988 *	  If a rename is not already in the works
2989 *	     call nfs_sillyrename() to set it up
2990 *     else
2991 *	  do the remove RPC
2992 */
2993static int
2994nfs_vnop_remove(
2995	struct vnop_remove_args /* {
2996		struct vnodeop_desc *a_desc;
2997		vnode_t a_dvp;
2998		vnode_t a_vp;
2999		struct componentname *a_cnp;
3000		int a_flags;
3001		vfs_context_t a_context;
3002	} */ *ap)
3003{
3004	vfs_context_t ctx = ap->a_context;
3005	vnode_t vp = ap->a_vp;
3006	vnode_t dvp = ap->a_dvp;
3007	struct componentname *cnp = ap->a_cnp;
3008	nfsnode_t dnp = VTONFS(dvp);
3009	nfsnode_t np = VTONFS(vp);
3010	int error = 0, nfsvers, inuse, gotattr = 0, flushed = 0, setsize = 0;
3011	struct nfs_vattr nvattr;
3012	struct nfsmount *nmp;
3013	struct nfs_dulookup dul;
3014
3015	/* XXX prevent removing a sillyrenamed file? */
3016
3017	nmp = NFSTONMP(dnp);
3018	if (!nmp)
3019		return (ENXIO);
3020	nfsvers = nmp->nm_vers;
3021
3022again_relock:
3023	error = nfs_lock2(dnp, np, NFS_NODE_LOCK_EXCLUSIVE);
3024	if (error)
3025		return (error);
3026
3027	/* lock the node while we remove the file */
3028	lck_mtx_lock(nfs_node_hash_mutex);
3029	while (np->n_hflag & NHLOCKED) {
3030		np->n_hflag |= NHLOCKWANT;
3031		msleep(np, nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
3032	}
3033	np->n_hflag |= NHLOCKED;
3034	lck_mtx_unlock(nfs_node_hash_mutex);
3035
3036	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen);
3037again:
3038	inuse = vnode_isinuse(vp, 0);
3039	if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) {
3040		/* Caller requested Carbon delete semantics, but file is busy */
3041		error = EBUSY;
3042		goto out;
3043	}
3044	if (inuse && !gotattr) {
3045		if (nfs_getattr(np, &nvattr, ctx, 1))
3046			nvattr.nva_nlink = 1;
3047		gotattr = 1;
3048		goto again;
3049	}
3050	if (!inuse || (np->n_sillyrename && (nvattr.nva_nlink > 1))) {
3051
3052		if (!inuse && !flushed) { /* flush all the buffers first */
3053			/* unlock the node */
3054			lck_mtx_lock(nfs_node_hash_mutex);
3055			np->n_hflag &= ~NHLOCKED;
3056			if (np->n_hflag & NHLOCKWANT) {
3057				np->n_hflag &= ~NHLOCKWANT;
3058				wakeup(np);
3059			}
3060			lck_mtx_unlock(nfs_node_hash_mutex);
3061			nfs_unlock2(dnp, np);
3062			error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
3063			FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011);
3064			flushed = 1;
3065			if (error == EINTR) {
3066				nfs_lock(np, NFS_NODE_LOCK_FORCE);
3067				NATTRINVALIDATE(np);
3068				nfs_unlock(np);
3069				return (error);
3070			}
3071			goto again_relock;
3072		}
3073
3074		/*
3075		 * Purge the name cache so that the chance of a lookup for
3076		 * the name succeeding while the remove is in progress is
3077		 * minimized.
3078		 */
3079		cache_purge(vp);
3080
3081		nfs_dulookup_start(&dul, dnp, ctx);
3082
3083		/* Do the rpc */
3084		error = nmp->nm_funcs->nf_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
3085				vfs_context_thread(ctx), vfs_context_ucred(ctx));
3086
3087		/*
3088		 * Kludge City: If the first reply to the remove rpc is lost..
3089		 *   the reply to the retransmitted request will be ENOENT
3090		 *   since the file was in fact removed
3091		 *   Therefore, we cheat and return success.
3092		 */
3093		if (error == ENOENT)
3094			error = 0;
3095
3096		if (!error && !inuse && !np->n_sillyrename) {
3097			/*
3098			 * removal succeeded, it's not in use, and not silly renamed so
3099			 * remove nfsnode from hash now so we can't accidentally find it
3100			 * again if another object gets created with the same filehandle
3101			 * before this vnode gets reclaimed
3102			 */
3103			lck_mtx_lock(nfs_node_hash_mutex);
3104			if (np->n_hflag & NHHASHED) {
3105				LIST_REMOVE(np, n_hash);
3106				np->n_hflag &= ~NHHASHED;
3107				FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
3108			}
3109			lck_mtx_unlock(nfs_node_hash_mutex);
3110			/* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
3111			/* clear all flags other than these */
3112			np->n_flag &= (NMODIFIED);
3113			vnode_recycle(vp);
3114			NATTRINVALIDATE(np);
3115			setsize = 1;
3116		} else {
3117			NATTRINVALIDATE(np);
3118		}
3119	} else if (!np->n_sillyrename) {
3120		nfs_dulookup_start(&dul, dnp, ctx);
3121		error = nfs_sillyrename(dnp, np, cnp, ctx);
3122		NATTRINVALIDATE(np);
3123	} else {
3124		NATTRINVALIDATE(np);
3125		nfs_dulookup_start(&dul, dnp, ctx);
3126	}
3127
3128	if (!nfs_getattr(dnp, &nvattr, ctx, 1)) {
3129		if (NFS_CHANGED_NC(nfsvers, dnp, &nvattr)) {
3130			dnp->n_flag &= ~NNEGNCENTRIES;
3131			cache_purge(dvp);
3132			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &nvattr);
3133		}
3134	}
3135	nfs_dulookup_finish(&dul, dnp, ctx);
3136out:
3137	/* unlock the node */
3138	lck_mtx_lock(nfs_node_hash_mutex);
3139	np->n_hflag &= ~NHLOCKED;
3140	if (np->n_hflag & NHLOCKWANT) {
3141		np->n_hflag &= ~NHLOCKWANT;
3142		wakeup(np);
3143	}
3144	lck_mtx_unlock(nfs_node_hash_mutex);
3145	nfs_unlock2(dnp, np);
3146	if (setsize)
3147		ubc_setsize(vp, 0);
3148	return (error);
3149}
3150
3151/*
3152 * NFS silly-renamed file removal function called from nfs_vnop_inactive
3153 */
3154int
3155nfs_removeit(struct nfs_sillyrename *nsp)
3156{
3157	struct nfsmount *nmp = NFSTONMP(nsp->nsr_dnp);
3158	if (!nmp)
3159		return (ENXIO);
3160	return nmp->nm_funcs->nf_remove_rpc(nsp->nsr_dnp, nsp->nsr_name, nsp->nsr_namlen, NULL, nsp->nsr_cred);
3161}
3162
3163/*
3164 * NFS remove rpc, called from nfs_remove() and nfs_removeit().
3165 */
3166int
3167nfs3_remove_rpc(
3168	nfsnode_t dnp,
3169	char *name,
3170	int namelen,
3171	thread_t thd,
3172	kauth_cred_t cred)
3173{
3174	int error = 0, status, wccpostattr = 0;
3175	struct timespec premtime = { 0, 0 };
3176	struct nfsmount *nmp;
3177	int nfsvers;
3178	u_int64_t xid;
3179	struct nfsm_chain nmreq, nmrep;
3180
3181	nmp = NFSTONMP(dnp);
3182	if (!nmp)
3183		return (ENXIO);
3184	nfsvers = nmp->nm_vers;
3185	if ((nfsvers == NFS_VER2) && (namelen > NFS_MAXNAMLEN))
3186		return (ENAMETOOLONG);
3187
3188	nfsm_chain_null(&nmreq);
3189	nfsm_chain_null(&nmrep);
3190
3191	nfsm_chain_build_alloc_init(error, &nmreq,
3192		NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
3193	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3194	nfsm_chain_add_string(error, &nmreq, name, namelen);
3195	nfsm_chain_build_done(error, &nmreq);
3196	nfsmout_if(error);
3197
3198	error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, 0, &nmrep, &xid, &status);
3199
3200	if (nfsvers == NFS_VER3)
3201		nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
3202	dnp->n_flag |= NMODIFIED;
3203	/* if directory hadn't changed, update namecache mtime */
3204	if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3205		NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3206	if (!wccpostattr)
3207		NATTRINVALIDATE(dnp);
3208	if (!error)
3209		error = status;
3210nfsmout:
3211	nfsm_chain_cleanup(&nmreq);
3212	nfsm_chain_cleanup(&nmrep);
3213	return (error);
3214}
3215
3216/*
3217 * NFS file rename call
3218 */
3219static int
3220nfs_vnop_rename(
3221	struct vnop_rename_args  /* {
3222		struct vnodeop_desc *a_desc;
3223		vnode_t a_fdvp;
3224		vnode_t a_fvp;
3225		struct componentname *a_fcnp;
3226		vnode_t a_tdvp;
3227		vnode_t a_tvp;
3228		struct componentname *a_tcnp;
3229		vfs_context_t a_context;
3230	} */ *ap)
3231{
3232	vfs_context_t ctx = ap->a_context;
3233	vnode_t fdvp = ap->a_fdvp;
3234	vnode_t fvp = ap->a_fvp;
3235	vnode_t tdvp = ap->a_tdvp;
3236	vnode_t tvp = ap->a_tvp;
3237	nfsnode_t fdnp, fnp, tdnp, tnp;
3238	struct componentname *tcnp = ap->a_tcnp;
3239	struct componentname *fcnp = ap->a_fcnp;
3240	int error, nfsvers, inuse=0, tvprecycle=0, locked=0;
3241	mount_t fmp, tdmp, tmp;
3242	struct nfs_vattr nvattr;
3243	struct nfsmount *nmp;
3244	struct nfs_dulookup fdul, tdul;
3245
3246	fdnp = VTONFS(fdvp);
3247	fnp = VTONFS(fvp);
3248	tdnp = VTONFS(tdvp);
3249	tnp = tvp ? VTONFS(tvp) : NULL;
3250
3251	nmp = NFSTONMP(fdnp);
3252	if (!nmp)
3253		return (ENXIO);
3254	nfsvers = nmp->nm_vers;
3255
3256	error = nfs_lock4(fdnp, fnp, tdnp, tnp, NFS_NODE_LOCK_EXCLUSIVE);
3257	if (error)
3258		return (error);
3259
3260	if (tvp && (tvp != fvp)) {
3261		/* lock the node while we rename over the existing file */
3262		lck_mtx_lock(nfs_node_hash_mutex);
3263		while (tnp->n_hflag & NHLOCKED) {
3264			tnp->n_hflag |= NHLOCKWANT;
3265			msleep(tnp, nfs_node_hash_mutex, PINOD, "nfs_rename", NULL);
3266		}
3267		tnp->n_hflag |= NHLOCKED;
3268		lck_mtx_unlock(nfs_node_hash_mutex);
3269		locked = 1;
3270	}
3271
3272	nfs_dulookup_init(&fdul, fdnp, fcnp->cn_nameptr, fcnp->cn_namelen);
3273	nfs_dulookup_init(&tdul, tdnp, tcnp->cn_nameptr, tcnp->cn_namelen);
3274
3275	/* Check for cross-device rename */
3276	fmp = vnode_mount(fvp);
3277	tmp = tvp ? vnode_mount(tvp) : NULL;
3278	tdmp = vnode_mount(tdvp);
3279	if ((fmp != tdmp) || (tvp && (fmp != tmp))) {
3280		error = EXDEV;
3281		goto out;
3282	}
3283
3284	/* XXX prevent renaming from/over a sillyrenamed file? */
3285
3286	/*
3287	 * If the tvp exists and is in use, sillyrename it before doing the
3288	 * rename of the new file over it.
3289	 * XXX Can't sillyrename a directory.
3290	 * Don't sillyrename if source and target are same vnode (hard
3291	 * links or case-variants)
3292	 */
3293	if (tvp && (tvp != fvp))
3294		inuse = vnode_isinuse(tvp, 0);
3295	if (inuse && !tnp->n_sillyrename && (vnode_vtype(tvp) != VDIR)) {
3296		error = nfs_sillyrename(tdnp, tnp, tcnp, ctx);
3297		if (error) {
3298			/* sillyrename failed. Instead of pressing on, return error */
3299			goto out; /* should not be ENOENT. */
3300		} else {
3301			/* sillyrename succeeded.*/
3302			tvp = NULL;
3303		}
3304	}
3305
3306	nfs_dulookup_start(&fdul, fdnp, ctx);
3307	nfs_dulookup_start(&tdul, tdnp, ctx);
3308
3309	error = nmp->nm_funcs->nf_rename_rpc(fdnp, fcnp->cn_nameptr, fcnp->cn_namelen,
3310			tdnp, tcnp->cn_nameptr, tcnp->cn_namelen, ctx);
3311
3312	/*
3313	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
3314	 */
3315	if (error == ENOENT)
3316		error = 0;
3317
3318	if (tvp && (tvp != fvp) && !tnp->n_sillyrename) {
3319		tvprecycle = (!error && !vnode_isinuse(tvp, 0) &&
3320		    (nfs_getattrcache(tnp, &nvattr, 1) || (nvattr.nva_nlink == 1)));
3321		lck_mtx_lock(nfs_node_hash_mutex);
3322		if (tvprecycle && (tnp->n_hflag & NHHASHED)) {
3323			/*
3324			 * remove nfsnode from hash now so we can't accidentally find it
3325			 * again if another object gets created with the same filehandle
3326			 * before this vnode gets reclaimed
3327			 */
3328			LIST_REMOVE(tnp, n_hash);
3329			tnp->n_hflag &= ~NHHASHED;
3330			FSDBG(266, 0, tnp, tnp->n_flag, 0xb1eb1e);
3331		}
3332		lck_mtx_unlock(nfs_node_hash_mutex);
3333	}
3334
3335	/* purge the old name cache entries and enter the new one */
3336	cache_purge(fvp);
3337	if (tvp) {
3338		cache_purge(tvp);
3339		if (tvprecycle) {
3340			/* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
3341			/* clear all flags other than these */
3342			tnp->n_flag &= (NMODIFIED);
3343			vnode_recycle(tvp);
3344		}
3345	}
3346	if (!error) {
3347		if (tdnp->n_flag & NNEGNCENTRIES) {
3348			tdnp->n_flag &= ~NNEGNCENTRIES;
3349			cache_purge_negatives(tdvp);
3350		}
3351		cache_enter(tdvp, fvp, tcnp);
3352		if (tdvp != fdvp) {	/* update parent pointer */
3353			if (fnp->n_parent && !vnode_get(fnp->n_parent)) {
3354				/* remove ref from old parent */
3355				vnode_rele(fnp->n_parent);
3356				vnode_put(fnp->n_parent);
3357			}
3358			fnp->n_parent = tdvp;
3359			if (tdvp && !vnode_get(tdvp)) {
3360				/* add ref to new parent */
3361				vnode_ref(tdvp);
3362				vnode_put(tdvp);
3363			} else {
3364				fnp->n_parent = NULL;
3365			}
3366		}
3367	}
3368out:
3369	if (!nfs_getattr(fdnp, &nvattr, ctx, 1)) {
3370		if (NFS_CHANGED_NC(nfsvers, fdnp, &nvattr)) {
3371			fdnp->n_flag &= ~NNEGNCENTRIES;
3372			cache_purge(fdvp);
3373			NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &nvattr);
3374		}
3375	}
3376	if (!nfs_getattr(tdnp, &nvattr, ctx, 1)) {
3377		if (NFS_CHANGED_NC(nfsvers, tdnp, &nvattr)) {
3378			tdnp->n_flag &= ~NNEGNCENTRIES;
3379			cache_purge(tdvp);
3380			NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &nvattr);
3381		}
3382	}
3383	nfs_dulookup_finish(&fdul, fdnp, ctx);
3384	nfs_dulookup_finish(&tdul, tdnp, ctx);
3385	if (locked) {
3386		/* unlock node */
3387		lck_mtx_lock(nfs_node_hash_mutex);
3388		tnp->n_hflag &= ~NHLOCKED;
3389		if (tnp->n_hflag & NHLOCKWANT) {
3390			tnp->n_hflag &= ~NHLOCKWANT;
3391			wakeup(tnp);
3392		}
3393		lck_mtx_unlock(nfs_node_hash_mutex);
3394	}
3395	nfs_unlock4(fdnp, fnp, tdnp, tnp);
3396	return (error);
3397}
3398
3399/*
3400 * Do an NFS rename rpc. Called from nfs_vnop_rename() and nfs_sillyrename().
3401 */
3402int
3403nfs3_rename_rpc(
3404	nfsnode_t fdnp,
3405	char *fnameptr,
3406	int fnamelen,
3407	nfsnode_t tdnp,
3408	char *tnameptr,
3409	int tnamelen,
3410	vfs_context_t ctx)
3411{
3412	int error = 0, status, fwccpostattr = 0, twccpostattr = 0;
3413	struct timespec fpremtime = { 0, 0 }, tpremtime = { 0, 0 };
3414	struct nfsmount *nmp;
3415	int nfsvers;
3416	u_int64_t xid, txid;
3417	struct nfsm_chain nmreq, nmrep;
3418
3419	nmp = NFSTONMP(fdnp);
3420	if (!nmp)
3421		return (ENXIO);
3422	nfsvers = nmp->nm_vers;
3423	if ((nfsvers == NFS_VER2) &&
3424	    ((fnamelen > NFS_MAXNAMLEN) || (tnamelen > NFS_MAXNAMLEN)))
3425		return (ENAMETOOLONG);
3426
3427	nfsm_chain_null(&nmreq);
3428	nfsm_chain_null(&nmrep);
3429
3430	nfsm_chain_build_alloc_init(error, &nmreq,
3431		(NFSX_FH(nfsvers) + NFSX_UNSIGNED) * 2 +
3432		nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen));
3433	nfsm_chain_add_fh(error, &nmreq, nfsvers, fdnp->n_fhp, fdnp->n_fhsize);
3434	nfsm_chain_add_string(error, &nmreq, fnameptr, fnamelen);
3435	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
3436	nfsm_chain_add_string(error, &nmreq, tnameptr, tnamelen);
3437	nfsm_chain_build_done(error, &nmreq);
3438	nfsmout_if(error);
3439
3440	error = nfs_request(fdnp, NULL, &nmreq, NFSPROC_RENAME, ctx, &nmrep, &xid, &status);
3441
3442	if (nfsvers == NFS_VER3) {
3443		txid = xid;
3444		nfsm_chain_get_wcc_data(error, &nmrep, fdnp, &fpremtime, &fwccpostattr, &xid);
3445		nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &tpremtime, &twccpostattr, &txid);
3446	}
3447	if (!error)
3448		error = status;
3449nfsmout:
3450	nfsm_chain_cleanup(&nmreq);
3451	nfsm_chain_cleanup(&nmrep);
3452	fdnp->n_flag |= NMODIFIED;
3453	/* if directory hadn't changed, update namecache mtime */
3454	if (nfstimespeccmp(&fdnp->n_ncmtime, &fpremtime, ==))
3455		NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &fdnp->n_vattr);
3456	if (!fwccpostattr)
3457		NATTRINVALIDATE(fdnp);
3458	tdnp->n_flag |= NMODIFIED;
3459	/* if directory hadn't changed, update namecache mtime */
3460	if (nfstimespeccmp(&tdnp->n_ncmtime, &tpremtime, ==))
3461		NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
3462	if (!twccpostattr)
3463		NATTRINVALIDATE(tdnp);
3464	return (error);
3465}
3466
3467/*
3468 * NFS hard link create call
3469 */
3470static int
3471nfs3_vnop_link(
3472	struct vnop_link_args /* {
3473		struct vnodeop_desc *a_desc;
3474		vnode_t a_vp;
3475		vnode_t a_tdvp;
3476		struct componentname *a_cnp;
3477		vfs_context_t a_context;
3478	} */ *ap)
3479{
3480	vfs_context_t ctx = ap->a_context;
3481	vnode_t vp = ap->a_vp;
3482	vnode_t tdvp = ap->a_tdvp;
3483	struct componentname *cnp = ap->a_cnp;
3484	int error = 0, status, wccpostattr = 0, attrflag = 0;
3485	struct timespec premtime = { 0, 0 };
3486	struct nfsmount *nmp;
3487	nfsnode_t np = VTONFS(vp);
3488	nfsnode_t tdnp = VTONFS(tdvp);
3489	int nfsvers;
3490	u_int64_t xid, txid;
3491	struct nfsm_chain nmreq, nmrep;
3492
3493	if (vnode_mount(vp) != vnode_mount(tdvp))
3494		return (EXDEV);
3495
3496	nmp = VTONMP(vp);
3497	if (!nmp)
3498		return (ENXIO);
3499	nfsvers = nmp->nm_vers;
3500	if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
3501		return (ENAMETOOLONG);
3502
3503	/*
3504	 * Push all writes to the server, so that the attribute cache
3505	 * doesn't get "out of sync" with the server.
3506	 * XXX There should be a better way!
3507	 */
3508	nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR);
3509
3510	error = nfs_lock2(tdnp, np, NFS_NODE_LOCK_EXCLUSIVE);
3511	if (error)
3512		return (error);
3513
3514	nfsm_chain_null(&nmreq);
3515	nfsm_chain_null(&nmrep);
3516
3517	nfsm_chain_build_alloc_init(error, &nmreq,
3518		NFSX_FH(nfsvers)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
3519	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
3520	nfsm_chain_add_fh(error, &nmreq, nfsvers, tdnp->n_fhp, tdnp->n_fhsize);
3521	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
3522	nfsm_chain_build_done(error, &nmreq);
3523	nfsmout_if(error);
3524	error = nfs_request(np, NULL, &nmreq, NFSPROC_LINK, ctx,
3525			&nmrep, &xid, &status);
3526	if (nfsvers == NFS_VER3) {
3527		txid = xid;
3528		nfsm_chain_postop_attr_update_flag(error, &nmrep, np, attrflag, &xid);
3529		nfsm_chain_get_wcc_data(error, &nmrep, tdnp, &premtime, &wccpostattr, &txid);
3530	}
3531	if (!error)
3532		error = status;
3533nfsmout:
3534	nfsm_chain_cleanup(&nmreq);
3535	nfsm_chain_cleanup(&nmrep);
3536	tdnp->n_flag |= NMODIFIED;
3537	if (!attrflag)
3538		NATTRINVALIDATE(np);
3539	/* if directory hadn't changed, update namecache mtime */
3540	if (nfstimespeccmp(&tdnp->n_ncmtime, &premtime, ==))
3541		NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr);
3542	if (!wccpostattr)
3543		NATTRINVALIDATE(tdnp);
3544	if (!error && (tdnp->n_flag & NNEGNCENTRIES)) {
3545		tdnp->n_flag &= ~NNEGNCENTRIES;
3546		cache_purge_negatives(tdvp);
3547	}
3548	nfs_unlock2(tdnp, np);
3549	/*
3550	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
3551	 */
3552	if (error == EEXIST)
3553		error = 0;
3554	return (error);
3555}
3556
3557/*
3558 * NFS symbolic link create call
3559 */
3560static int
3561nfs3_vnop_symlink(
3562	struct vnop_symlink_args /* {
3563		struct vnodeop_desc *a_desc;
3564		vnode_t a_dvp;
3565		vnode_t *a_vpp;
3566		struct componentname *a_cnp;
3567		struct vnode_attr *a_vap;
3568		char *a_target;
3569		vfs_context_t a_context;
3570	} */ *ap)
3571{
3572	vfs_context_t ctx = ap->a_context;
3573	vnode_t dvp = ap->a_dvp;
3574	struct vnode_attr *vap = ap->a_vap;
3575	struct componentname *cnp = ap->a_cnp;
3576	struct nfs_vattr nvattr, dnvattr;
3577	fhandle_t fh;
3578	int slen, error = 0, lockerror = ENOENT, status, wccpostattr = 0;
3579	struct timespec premtime = { 0, 0 };
3580	vnode_t newvp = NULL;
3581	int nfsvers, gotuid, gotgid;
3582	u_int64_t xid, dxid;
3583	nfsnode_t np = NULL;
3584	nfsnode_t dnp = VTONFS(dvp);
3585	struct nfsmount *nmp;
3586	struct nfsm_chain nmreq, nmrep;
3587	struct nfsreq *req = NULL;
3588	struct nfs_dulookup dul;
3589
3590	nmp = VTONMP(dvp);
3591	if (!nmp)
3592		return (ENXIO);
3593	nfsvers = nmp->nm_vers;
3594
3595	slen = strlen(ap->a_target);
3596	if ((nfsvers == NFS_VER2) &&
3597	    ((cnp->cn_namelen > NFS_MAXNAMLEN) || (slen > NFS_MAXPATHLEN)))
3598		return (ENAMETOOLONG);
3599
3600	VATTR_SET_SUPPORTED(vap, va_mode);
3601	VATTR_SET_SUPPORTED(vap, va_uid);
3602	VATTR_SET_SUPPORTED(vap, va_gid);
3603	VATTR_SET_SUPPORTED(vap, va_data_size);
3604	VATTR_SET_SUPPORTED(vap, va_access_time);
3605	VATTR_SET_SUPPORTED(vap, va_modify_time);
3606	gotuid = VATTR_IS_ACTIVE(vap, va_uid);
3607	gotgid = VATTR_IS_ACTIVE(vap, va_gid);
3608
3609	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen);
3610
3611	nfsm_chain_null(&nmreq);
3612	nfsm_chain_null(&nmrep);
3613
3614	nfsm_chain_build_alloc_init(error, &nmreq,
3615		NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
3616		nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(nfsvers));
3617	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3618	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
3619	if (nfsvers == NFS_VER3)
3620		nfsm_chain_add_v3sattr(error, &nmreq, vap);
3621	nfsm_chain_add_string(error, &nmreq, ap->a_target, slen);
3622	if (nfsvers == NFS_VER2)
3623		nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
3624	nfsm_chain_build_done(error, &nmreq);
3625	nfsmout_if(error);
3626	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
3627		error = lockerror;
3628	nfsmout_if(error);
3629
3630	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_SYMLINK,
3631			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
3632	if (!error) {
3633		nfs_dulookup_start(&dul, dnp, ctx);
3634		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3635	}
3636
3637	dxid = xid;
3638	if (!error && !status) {
3639		if (dnp->n_flag & NNEGNCENTRIES) {
3640			dnp->n_flag &= ~NNEGNCENTRIES;
3641			cache_purge_negatives(dvp);
3642		}
3643		if (nfsvers == NFS_VER3)
3644			error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
3645		else
3646			fh.fh_len = 0;
3647	}
3648	if (nfsvers == NFS_VER3)
3649		nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
3650	if (!error)
3651		error = status;
3652nfsmout:
3653	nfsm_chain_cleanup(&nmreq);
3654	nfsm_chain_cleanup(&nmrep);
3655
3656	if (!lockerror) {
3657		dnp->n_flag |= NMODIFIED;
3658		/* if directory hadn't changed, update namecache mtime */
3659		if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3660			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3661		if (!wccpostattr)
3662			NATTRINVALIDATE(dnp);
3663		if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) {
3664			if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) {
3665				dnp->n_flag &= ~NNEGNCENTRIES;
3666				cache_purge(dvp);
3667				NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr);
3668			}
3669		}
3670	}
3671
3672	if (!error && fh.fh_len)
3673		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
3674	if (!error && np)
3675		newvp = NFSTOV(np);
3676
3677	nfs_dulookup_finish(&dul, dnp, ctx);
3678
3679	/*
3680	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
3681	 * if we can succeed in looking up the symlink.
3682	 */
3683	if ((error == EEXIST) || (!error && !newvp)) {
3684		if (newvp) {
3685			nfs_unlock(np);
3686			vnode_put(newvp);
3687			newvp = NULL;
3688		}
3689		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
3690		if (!error) {
3691			newvp = NFSTOV(np);
3692			if (vnode_vtype(newvp) != VLNK)
3693				error = EEXIST;
3694		}
3695	}
3696	if (!lockerror)
3697		nfs_unlock(dnp);
3698	if (!error && (gotuid || gotgid) &&
3699	    (!newvp || nfs_getattrcache(np, &nvattr, 1) ||
3700	     (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
3701	     (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
3702		/* clear ID bits if server didn't use them (or we can't tell) */
3703		VATTR_CLEAR_SUPPORTED(vap, va_uid);
3704		VATTR_CLEAR_SUPPORTED(vap, va_gid);
3705	}
3706	if (error) {
3707		if (newvp) {
3708			nfs_unlock(np);
3709			vnode_put(newvp);
3710		}
3711	} else {
3712		nfs_unlock(np);
3713		*ap->a_vpp = newvp;
3714	}
3715	return (error);
3716}
3717
3718/*
3719 * NFS make dir call
3720 */
3721static int
3722nfs3_vnop_mkdir(
3723	struct vnop_mkdir_args /* {
3724		struct vnodeop_desc *a_desc;
3725		vnode_t a_dvp;
3726		vnode_t *a_vpp;
3727		struct componentname *a_cnp;
3728		struct vnode_attr *a_vap;
3729		vfs_context_t a_context;
3730	} */ *ap)
3731{
3732	vfs_context_t ctx = ap->a_context;
3733	vnode_t dvp = ap->a_dvp;
3734	struct vnode_attr *vap = ap->a_vap;
3735	struct componentname *cnp = ap->a_cnp;
3736	struct nfs_vattr nvattr, dnvattr;
3737	nfsnode_t np = NULL;
3738	struct nfsmount *nmp;
3739	nfsnode_t dnp = VTONFS(dvp);
3740	vnode_t newvp = NULL;
3741	int error = 0, lockerror = ENOENT, status, wccpostattr = 0;
3742	struct timespec premtime = { 0, 0 };
3743	int nfsvers, gotuid, gotgid;
3744	u_int64_t xid, dxid;
3745	fhandle_t fh;
3746	struct nfsm_chain nmreq, nmrep;
3747	struct nfsreq *req = NULL;
3748	struct nfs_dulookup dul;
3749
3750	nmp = VTONMP(dvp);
3751	if (!nmp)
3752		return (ENXIO);
3753	nfsvers = nmp->nm_vers;
3754	if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
3755		return (ENAMETOOLONG);
3756
3757	VATTR_SET_SUPPORTED(vap, va_mode);
3758	VATTR_SET_SUPPORTED(vap, va_uid);
3759	VATTR_SET_SUPPORTED(vap, va_gid);
3760	VATTR_SET_SUPPORTED(vap, va_data_size);
3761	VATTR_SET_SUPPORTED(vap, va_access_time);
3762	VATTR_SET_SUPPORTED(vap, va_modify_time);
3763	gotuid = VATTR_IS_ACTIVE(vap, va_uid);
3764	gotgid = VATTR_IS_ACTIVE(vap, va_gid);
3765
3766	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen);
3767
3768	nfsm_chain_null(&nmreq);
3769	nfsm_chain_null(&nmrep);
3770
3771	nfsm_chain_build_alloc_init(error, &nmreq,
3772		NFSX_FH(nfsvers) + NFSX_UNSIGNED +
3773		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
3774	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3775	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
3776	if (nfsvers == NFS_VER3)
3777		nfsm_chain_add_v3sattr(error, &nmreq, vap);
3778	else
3779		nfsm_chain_add_v2sattr(error, &nmreq, vap, -1);
3780	nfsm_chain_build_done(error, &nmreq);
3781	nfsmout_if(error);
3782	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
3783		error = lockerror;
3784	nfsmout_if(error);
3785
3786	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKDIR,
3787			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
3788	if (!error) {
3789		nfs_dulookup_start(&dul, dnp, ctx);
3790		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3791	}
3792
3793	dxid = xid;
3794	if (!error && !status) {
3795		if (dnp->n_flag & NNEGNCENTRIES) {
3796			dnp->n_flag &= ~NNEGNCENTRIES;
3797			cache_purge_negatives(dvp);
3798		}
3799		error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
3800	}
3801	if (nfsvers == NFS_VER3)
3802		nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
3803	if (!error)
3804		error = status;
3805nfsmout:
3806	nfsm_chain_cleanup(&nmreq);
3807	nfsm_chain_cleanup(&nmrep);
3808
3809	if (!lockerror) {
3810		dnp->n_flag |= NMODIFIED;
3811		/* if directory hadn't changed, update namecache mtime */
3812		if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3813			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3814		if (!wccpostattr)
3815			NATTRINVALIDATE(dnp);
3816		if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) {
3817			if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) {
3818				dnp->n_flag &= ~NNEGNCENTRIES;
3819				cache_purge(dvp);
3820				NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr);
3821			}
3822		}
3823	}
3824
3825	if (!error && fh.fh_len)
3826		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
3827	if (!error && np)
3828		newvp = NFSTOV(np);
3829
3830	nfs_dulookup_finish(&dul, dnp, ctx);
3831
3832	/*
3833	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
3834	 * if we can succeed in looking up the directory.
3835	 */
3836	if (error == EEXIST || (!error && !newvp)) {
3837		if (newvp) {
3838			nfs_unlock(np);
3839			vnode_put(newvp);
3840			newvp = NULL;
3841		}
3842		error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
3843		if (!error) {
3844			newvp = NFSTOV(np);
3845			if (vnode_vtype(newvp) != VDIR)
3846				error = EEXIST;
3847		}
3848	}
3849	if (!lockerror)
3850		nfs_unlock(dnp);
3851	if (!error && (gotuid || gotgid) &&
3852	    (!newvp || nfs_getattrcache(np, &nvattr, 1) ||
3853	     (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
3854	     (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
3855		/* clear ID bits if server didn't use them (or we can't tell) */
3856		VATTR_CLEAR_SUPPORTED(vap, va_uid);
3857		VATTR_CLEAR_SUPPORTED(vap, va_gid);
3858	}
3859	if (error) {
3860		if (newvp) {
3861			nfs_unlock(np);
3862			vnode_put(newvp);
3863		}
3864	} else {
3865		nfs_unlock(np);
3866		*ap->a_vpp = newvp;
3867	}
3868	return (error);
3869}
3870
3871/*
3872 * NFS remove directory call
3873 */
3874static int
3875nfs3_vnop_rmdir(
3876	struct vnop_rmdir_args /* {
3877		struct vnodeop_desc *a_desc;
3878		vnode_t a_dvp;
3879		vnode_t a_vp;
3880		struct componentname *a_cnp;
3881		vfs_context_t a_context;
3882	} */ *ap)
3883{
3884	vfs_context_t ctx = ap->a_context;
3885	vnode_t vp = ap->a_vp;
3886	vnode_t dvp = ap->a_dvp;
3887	struct componentname *cnp = ap->a_cnp;
3888	int error = 0, status, wccpostattr = 0;
3889	struct timespec premtime = { 0, 0 };
3890	struct nfsmount *nmp;
3891	nfsnode_t np = VTONFS(vp);
3892	nfsnode_t dnp = VTONFS(dvp);
3893	struct nfs_vattr dnvattr;
3894	int nfsvers;
3895	u_int64_t xid;
3896	struct nfsm_chain nmreq, nmrep;
3897	struct nfsreq *req = NULL;
3898	struct nfs_dulookup dul;
3899
3900	nmp = VTONMP(vp);
3901	if (!nmp)
3902		return (ENXIO);
3903	nfsvers = nmp->nm_vers;
3904	if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
3905		return (ENAMETOOLONG);
3906
3907	nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen);
3908
3909	if ((error = nfs_lock2(dnp, np, NFS_NODE_LOCK_EXCLUSIVE)))
3910		return (error);
3911
3912	nfsm_chain_null(&nmreq);
3913	nfsm_chain_null(&nmrep);
3914
3915	nfsm_chain_build_alloc_init(error, &nmreq,
3916		NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
3917	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
3918	nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
3919	nfsm_chain_build_done(error, &nmreq);
3920	nfsmout_if(error);
3921
3922	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_RMDIR,
3923			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
3924	if (!error) {
3925		nfs_dulookup_start(&dul, dnp, ctx);
3926		error = nfs_request_async_finish(req, &nmrep, &xid, &status);
3927	}
3928
3929	if (nfsvers == NFS_VER3)
3930		nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
3931	if (!error)
3932		error = status;
3933nfsmout:
3934	nfsm_chain_cleanup(&nmreq);
3935	nfsm_chain_cleanup(&nmrep);
3936
3937	dnp->n_flag |= NMODIFIED;
3938	/* if directory hadn't changed, update namecache mtime */
3939	if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
3940		NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
3941	if (!wccpostattr)
3942		NATTRINVALIDATE(dnp);
3943	cache_purge(vp);
3944	if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) {
3945		if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) {
3946			dnp->n_flag &= ~NNEGNCENTRIES;
3947			cache_purge(dvp);
3948			NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr);
3949		}
3950	}
3951	nfs_dulookup_finish(&dul, dnp, ctx);
3952	nfs_unlock2(dnp, np);
3953
3954	/*
3955	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
3956	 */
3957	if (error == ENOENT)
3958		error = 0;
3959	if (!error) {
3960		/*
3961		 * remove nfsnode from hash now so we can't accidentally find it
3962		 * again if another object gets created with the same filehandle
3963		 * before this vnode gets reclaimed
3964		 */
3965		lck_mtx_lock(nfs_node_hash_mutex);
3966		if (np->n_hflag & NHHASHED) {
3967			LIST_REMOVE(np, n_hash);
3968			np->n_hflag &= ~NHHASHED;
3969			FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
3970		}
3971		lck_mtx_unlock(nfs_node_hash_mutex);
3972	}
3973	return (error);
3974}
3975
3976/*
3977 * NFS readdir call
3978 */
3979static int
3980nfs_vnop_readdir(
3981	struct vnop_readdir_args /* {
3982		struct vnodeop_desc *a_desc;
3983		vnode_t a_vp;
3984		struct uio *a_uio;
3985		int *a_eofflag;
3986		int *a_ncookies;
3987		u_long **a_cookies;
3988		vfs_context_t a_context;
3989	} */ *ap)
3990{
3991	vfs_context_t ctx = ap->a_context;
3992	vnode_t vp = ap->a_vp;
3993	nfsnode_t np = VTONFS(vp);
3994	struct nfsmount *nmp;
3995	struct uio *uio = ap->a_uio;
3996	int tresid, error, nfsvers;
3997	struct nfs_vattr nvattr;
3998
3999	if (vnode_vtype(vp) != VDIR)
4000		return (EPERM);
4001
4002	nmp = VTONMP(vp);
4003	if (!nmp)
4004		return (ENXIO);
4005	nfsvers = nmp->nm_vers;
4006
4007	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
4008		return (error);
4009
4010	/*
4011	 * First, check for hit on the EOF offset cache
4012	 */
4013	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
4014	    (np->n_flag & NMODIFIED) == 0) {
4015		if (!nfs_getattr(np, &nvattr, ctx, 1)) {
4016			if (!NFS_CHANGED(nfsvers, np, &nvattr)) {
4017				nfs_unlock(np);
4018				OSAddAtomic(1, (SInt32*)&nfsstats.direofcache_hits);
4019				if (ap->a_eofflag)
4020					*ap->a_eofflag = 1;
4021				return (0);
4022			}
4023			if (NFS_CHANGED_NC(nfsvers, np, &nvattr)) {
4024				/* directory changed, purge any name cache entries */
4025				np->n_flag &= ~NNEGNCENTRIES;
4026				cache_purge(vp);
4027			}
4028		}
4029	}
4030	nfs_unlock(np);
4031	if (ap->a_eofflag)
4032		*ap->a_eofflag = 0;
4033
4034	/*
4035	 * Call nfs_bioread() to do the real work.
4036	 */
4037	// LP64todo - fix this
4038	tresid = uio_uio_resid(uio);
4039	error = nfs_bioread(np, uio, 0, ap->a_eofflag, ctx);
4040
4041	if (!error && uio_uio_resid(uio) == tresid)
4042		OSAddAtomic(1, (SInt32*)&nfsstats.direofcache_misses);
4043	return (error);
4044}
4045
4046/*
4047 * Readdir RPC call.
4048 * Called from below the buffer cache by nfs_buf_readdir().
4049 */
4050#define	DIRHDSIZ	((int)(sizeof(struct dirent) - (MAXNAMLEN + 1)))
4051int
4052nfs3_readdir_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx)
4053{
4054	int len, skiplen, left;
4055	struct dirent *dp = NULL;
4056	nfsuint64 *cookiep;
4057	nfsuint64 cookie;
4058	struct nfsmount *nmp;
4059	u_quad_t fileno;
4060	int error = 0, lockerror, status, tlen, more_dirs = 1, blksiz = 0, bigenough = 1, eof;
4061	int nfsvers, nmreaddirsize;
4062	u_int64_t xid;
4063	struct nfsm_chain nmreq, nmrep;
4064	char *cp;
4065
4066#if DIAGNOSTIC
4067	/* XXX limitation based on need to adjust uio */
4068	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
4069		(uio_uio_resid(uiop) & (DIRBLKSIZ - 1)))
4070		panic("nfs_readdirrpc: bad uio");
4071#endif
4072	nmp = NFSTONMP(dnp);
4073	if (!nmp)
4074		return (ENXIO);
4075	nfsvers = nmp->nm_vers;
4076	nmreaddirsize = nmp->nm_readdirsize;
4077
4078	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED)))
4079		return (lockerror);
4080
4081	/*
4082	 * If there is no cookie, assume directory was stale.
4083	 */
4084	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
4085	if (cookiep)
4086		cookie = *cookiep;
4087	else {
4088		nfs_unlock(dnp);
4089		return (NFSERR_BAD_COOKIE);
4090	}
4091
4092	/*
4093	 * Loop around doing readdir rpc's of size nm_readdirsize
4094	 * truncated to a multiple of DIRBLKSIZ.
4095	 * The stopping criteria is EOF or buffer full.
4096	 */
4097	nfsm_chain_null(&nmreq);
4098	nfsm_chain_null(&nmrep);
4099	while (more_dirs && bigenough) {
4100		nfsm_chain_build_alloc_init(error, &nmreq,
4101			NFSX_FH(nfsvers) + NFSX_READDIR(nfsvers));
4102		nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4103		if (nfsvers == NFS_VER3) {
4104			/* opaque values don't need swapping, but as long */
4105			/* as we are consistent about it, it should be ok */
4106			nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[0]);
4107			nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[1]);
4108			nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[0]);
4109			nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[1]);
4110		} else {
4111			nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[0]);
4112		}
4113		nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
4114		nfsm_chain_build_done(error, &nmreq);
4115		nfs_unlock(dnp);
4116		lockerror = ENOENT;
4117		nfsmout_if(error);
4118
4119		error = nfs_request(dnp, NULL, &nmreq, NFSPROC_READDIR, ctx,
4120				&nmrep, &xid, &status);
4121
4122		if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
4123			error = lockerror;
4124
4125		if (nfsvers == NFS_VER3)
4126			nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
4127		if (!error)
4128			error = status;
4129		if (nfsvers == NFS_VER3) {
4130			nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[0]);
4131			nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[1]);
4132		}
4133		nfsm_chain_get_32(error, &nmrep, more_dirs);
4134
4135		if (!lockerror) {
4136			nfs_unlock(dnp);
4137			lockerror = ENOENT;
4138		}
4139		nfsmout_if(error);
4140
4141		/* loop thru the dir entries, doctoring them to 4bsd form */
4142		while (more_dirs && bigenough) {
4143			if (nfsvers == NFS_VER3)
4144				nfsm_chain_get_64(error, &nmrep, fileno);
4145			else
4146				nfsm_chain_get_32(error, &nmrep, fileno);
4147			nfsm_chain_get_32(error, &nmrep, len);
4148			nfsmout_if(error);
4149			/* Note: v3 supports longer names, but struct dirent doesn't */
4150			/* so we just truncate the names to fit */
4151			if (len <= 0) {
4152				error = EBADRPC;
4153				goto nfsmout;
4154			}
4155			if (len > MAXNAMLEN) {
4156				skiplen = len - MAXNAMLEN;
4157				len = MAXNAMLEN;
4158			} else {
4159				skiplen = 0;
4160			}
4161			tlen = nfsm_rndup(len);
4162			if (tlen == len)
4163				tlen += 4;	/* To ensure null termination */
4164			left = DIRBLKSIZ - blksiz;
4165			if ((tlen + DIRHDSIZ) > left) {
4166				dp->d_reclen += left;
4167				uio_iov_base_add(uiop, left);
4168				uio_iov_len_add(uiop, -left);
4169				uiop->uio_offset += left;
4170				uio_uio_resid_add(uiop, -left);
4171				blksiz = 0;
4172			}
4173			if ((tlen + DIRHDSIZ) > uio_uio_resid(uiop))
4174				bigenough = 0;
4175			if (bigenough) {
4176				// LP64todo - fix this!
4177				dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
4178				dp->d_fileno = (int)fileno;
4179				dp->d_namlen = len;
4180				dp->d_reclen = tlen + DIRHDSIZ;
4181				dp->d_type = DT_UNKNOWN;
4182				blksiz += dp->d_reclen;
4183				if (blksiz == DIRBLKSIZ)
4184					blksiz = 0;
4185				uiop->uio_offset += DIRHDSIZ;
4186#if LP64KERN
4187				uio_uio_resid_add(uiop, -((int64_t)DIRHDSIZ));
4188				uio_iov_len_add(uiop, -((int64_t)DIRHDSIZ));
4189#else
4190				uio_uio_resid_add(uiop, -((int)DIRHDSIZ));
4191				uio_iov_len_add(uiop, -((int)DIRHDSIZ));
4192#endif
4193				uio_iov_base_add(uiop, DIRHDSIZ);
4194				error = nfsm_chain_get_uio(&nmrep, len, uiop);
4195				nfsmout_if(error);
4196				// LP64todo - fix this!
4197				cp = CAST_DOWN(caddr_t, uio_iov_base(uiop));
4198				tlen -= len;
4199				*cp = '\0';	/* null terminate */
4200				uio_iov_base_add(uiop, tlen);
4201				uio_iov_len_add(uiop, -tlen);
4202				uiop->uio_offset += tlen;
4203				uio_uio_resid_add(uiop, -tlen);
4204				if (skiplen)
4205					nfsm_chain_adv(error, &nmrep,
4206						nfsm_rndup(len + skiplen) - nfsm_rndup(len));
4207			} else {
4208				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len + skiplen));
4209			}
4210			if (bigenough) {
4211				nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[0]);
4212				if (nfsvers == NFS_VER3)
4213					nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[1]);
4214			} else if (nfsvers == NFS_VER3)
4215				nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED);
4216			else
4217				nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
4218			nfsm_chain_get_32(error, &nmrep, more_dirs);
4219			nfsmout_if(error);
4220		}
4221		/*
4222		 * If at end of rpc data, get the eof boolean
4223		 */
4224		if (!more_dirs) {
4225			nfsm_chain_get_32(error, &nmrep, eof);
4226			if (!error)
4227				more_dirs = (eof == 0);
4228		}
4229		if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED)))
4230			error = lockerror;
4231		nfsmout_if(error);
4232		nfsm_chain_cleanup(&nmrep);
4233		nfsm_chain_null(&nmreq);
4234	}
4235	if (!lockerror) {
4236		nfs_unlock(dnp);
4237		lockerror = ENOENT;
4238	}
4239	/*
4240	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
4241	 * by increasing d_reclen for the last record.
4242	 */
4243	if (blksiz > 0) {
4244		left = DIRBLKSIZ - blksiz;
4245		dp->d_reclen += left;
4246		uio_iov_base_add(uiop, left);
4247		uio_iov_len_add(uiop, -left);
4248		uiop->uio_offset += left;
4249		uio_uio_resid_add(uiop, -left);
4250	}
4251
4252	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
4253		error = lockerror;
4254	nfsmout_if(error);
4255
4256	/*
4257	 * We are now either at the end of the directory or have filled the
4258	 * block.
4259	 */
4260	if (bigenough)
4261		dnp->n_direofoffset = uiop->uio_offset;
4262	else {
4263		if (uio_uio_resid(uiop) > 0)
4264			printf("EEK! readdirrpc resid > 0\n");
4265		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
4266		if (cookiep)
4267			*cookiep = cookie;
4268	}
4269
4270nfsmout:
4271	if (!lockerror)
4272		nfs_unlock(dnp);
4273	nfsm_chain_cleanup(&nmreq);
4274	nfsm_chain_cleanup(&nmrep);
4275	return (error);
4276}
4277
4278/*
4279 * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
4280 */
4281int
4282nfs3_readdirplus_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx)
4283{
4284	size_t len, tlen, skiplen, left;
4285	struct dirent *dp = NULL;
4286	vnode_t newvp;
4287	nfsuint64 *cookiep;
4288	struct componentname cn, *cnp = &cn;
4289	nfsuint64 cookie;
4290	struct nfsmount *nmp;
4291	nfsnode_t np;
4292	u_char *fhp;
4293	u_quad_t fileno;
4294	int error = 0, lockerror, status, more_dirs = 1, blksiz = 0, doit, bigenough = 1;
4295	int nfsvers, nmreaddirsize, nmrsize, attrflag, eof;
4296	size_t fhsize;
4297	u_int64_t xid, savexid;
4298	struct nfs_vattr nvattr;
4299	struct nfsm_chain nmreq, nmrep;
4300	char *cp;
4301
4302#if DIAGNOSTIC
4303	/* XXX limitation based on need to adjust uio */
4304	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
4305		(uio_uio_resid(uiop) & (DIRBLKSIZ - 1)))
4306		panic("nfs3_readdirplus_rpc: bad uio");
4307#endif
4308	nmp = NFSTONMP(dnp);
4309	if (!nmp)
4310		return (ENXIO);
4311	nfsvers = nmp->nm_vers;
4312	nmreaddirsize = nmp->nm_readdirsize;
4313	nmrsize = nmp->nm_rsize;
4314
4315	bzero(cnp, sizeof(*cnp));
4316	newvp = NULLVP;
4317
4318	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED)))
4319		return (lockerror);
4320
4321	/*
4322	 * If there is no cookie, assume directory was stale.
4323	 */
4324	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
4325	if (cookiep)
4326		cookie = *cookiep;
4327	else {
4328		nfs_unlock(dnp);
4329		return (NFSERR_BAD_COOKIE);
4330	}
4331
4332	/*
4333	 * Loop around doing readdir rpc's of size nm_readdirsize
4334	 * truncated to a multiple of DIRBLKSIZ.
4335	 * The stopping criteria is EOF or buffer full.
4336	 */
4337	nfsm_chain_null(&nmreq);
4338	nfsm_chain_null(&nmrep);
4339	while (more_dirs && bigenough) {
4340		nfsm_chain_build_alloc_init(error, &nmreq,
4341			NFSX_FH(NFS_VER3) + 6 * NFSX_UNSIGNED);
4342		nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4343		/* opaque values don't need swapping, but as long */
4344		/* as we are consistent about it, it should be ok */
4345		nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[0]);
4346		nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[1]);
4347		nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[0]);
4348		nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[1]);
4349		nfsm_chain_add_32(error, &nmreq, nmreaddirsize);
4350		nfsm_chain_add_32(error, &nmreq, nmrsize);
4351		nfsm_chain_build_done(error, &nmreq);
4352		nfs_unlock(dnp);
4353		lockerror = ENOENT;
4354		nfsmout_if(error);
4355
4356		error = nfs_request(dnp, NULL, &nmreq, NFSPROC_READDIRPLUS, ctx,
4357				&nmrep, &xid, &status);
4358
4359		if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
4360			error = lockerror;
4361
4362		savexid = xid;
4363		nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
4364		if (!error)
4365			error = status;
4366		nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[0]);
4367		nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[1]);
4368		nfsm_chain_get_32(error, &nmrep, more_dirs);
4369
4370		if (!lockerror) {
4371			nfs_unlock(dnp);
4372			lockerror = ENOENT;
4373		}
4374		nfsmout_if(error);
4375		nfsmout_if(error);
4376
4377		/* loop thru the dir entries, doctoring them to 4bsd form */
4378		while (more_dirs && bigenough) {
4379			nfsm_chain_get_64(error, &nmrep, fileno);
4380			nfsm_chain_get_32(error, &nmrep, len);
4381			nfsmout_if(error);
4382			/* Note: v3 supports longer names, but struct dirent doesn't */
4383			/* so we just truncate the names to fit */
4384			if (len <= 0) {
4385				error = EBADRPC;
4386				goto nfsmout;
4387			}
4388			if (len > MAXNAMLEN) {
4389				skiplen = len - MAXNAMLEN;
4390				len = MAXNAMLEN;
4391			} else {
4392				skiplen = 0;
4393			}
4394			tlen = nfsm_rndup(len);
4395			if (tlen == len)
4396				tlen += 4;	/* To ensure null termination */
4397			left = DIRBLKSIZ - blksiz;
4398			if ((tlen + DIRHDSIZ) > left) {
4399				dp->d_reclen += left;
4400				uio_iov_base_add(uiop, left);
4401				uio_iov_len_add(uiop, -left);
4402				uiop->uio_offset += left;
4403				uio_uio_resid_add(uiop, -left);
4404				blksiz = 0;
4405			}
4406			if ((tlen + DIRHDSIZ) > uio_uio_resid(uiop))
4407				bigenough = 0;
4408			if (bigenough) {
4409				// LP64todo - fix this!
4410				dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop));
4411				dp->d_fileno = (int)fileno;
4412				dp->d_namlen = len;
4413				dp->d_reclen = tlen + DIRHDSIZ;
4414				dp->d_type = DT_UNKNOWN;
4415				blksiz += dp->d_reclen;
4416				if (blksiz == DIRBLKSIZ)
4417					blksiz = 0;
4418				uiop->uio_offset += DIRHDSIZ;
4419#if LP64KERN
4420				uio_uio_resid_add(uiop, -((int64_t)DIRHDSIZ));
4421				uio_iov_len_add(uiop, -((int64_t)DIRHDSIZ));
4422#else
4423				uio_uio_resid_add(uiop, -((int)DIRHDSIZ));
4424				uio_iov_len_add(uiop, -((int)DIRHDSIZ));
4425#endif
4426				uio_iov_base_add(uiop, DIRHDSIZ);
4427				// LP64todo - fix this!
4428				cnp->cn_nameptr = CAST_DOWN(caddr_t, uio_iov_base(uiop));
4429				cnp->cn_namelen = len;
4430				error = nfsm_chain_get_uio(&nmrep, len, uiop);
4431				nfsmout_if(error);
4432				cp = CAST_DOWN(caddr_t, uio_iov_base(uiop));
4433				tlen -= len;
4434				*cp = '\0';
4435				uio_iov_base_add(uiop, tlen);
4436				uio_iov_len_add(uiop, -tlen);
4437				uiop->uio_offset += tlen;
4438				uio_uio_resid_add(uiop, -tlen);
4439				if (skiplen)
4440					nfsm_chain_adv(error, &nmrep,
4441						nfsm_rndup(len + skiplen) - nfsm_rndup(len));
4442			} else {
4443				nfsm_chain_adv(error, &nmrep, nfsm_rndup(len + skiplen));
4444			}
4445			if (bigenough) {
4446				nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[0]);
4447				nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[1]);
4448			} else
4449				nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED);
4450
4451			nfsm_chain_get_32(error, &nmrep, attrflag);
4452			nfsmout_if(error);
4453			if (attrflag) {
4454			    /* grab attributes */
4455			    error = nfs_parsefattr(&nmrep, NFS_VER3, &nvattr);
4456			    nfsmout_if(error);
4457			    dp->d_type = IFTODT(VTTOIF(nvattr.nva_type));
4458			    /* check for file handle */
4459			    nfsm_chain_get_32(error, &nmrep, doit);
4460			    nfsmout_if(error);
4461			    if (doit) {
4462				nfsm_chain_get_fh_ptr(error, &nmrep, NFS_VER3, fhp, fhsize);
4463				nfsmout_if(error);
4464				if (NFS_CMPFH(dnp, fhp, fhsize)) {
4465				    error = vnode_ref(NFSTOV(dnp));
4466				    if (error) {
4467					doit = 0;
4468				    } else {
4469					if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
4470					    error = lockerror;
4471					if (error) {
4472					    vnode_rele(NFSTOV(dnp));
4473					    goto nfsmout;
4474					}
4475					newvp = NFSTOV(dnp);
4476					np = dnp;
4477				    }
4478				} else if (!bigenough ||
4479				        (cnp->cn_namelen == 2 &&
4480					 cnp->cn_nameptr[1] == '.' &&
4481					 cnp->cn_nameptr[0] == '.')) {
4482				    /*
4483				     * XXXmacko I don't think this ".." thing is a problem anymore.
4484				     * don't doit if we can't guarantee
4485				     * that this entry is NOT ".." because
4486				     * we would have to drop the lock on
4487				     * the directory before getting the
4488				     * lock on the ".." vnode... and we
4489				     * don't want to drop the dvp lock in
4490				     * the middle of a readdirplus.
4491				     */
4492				    doit = 0;
4493				} else {
4494				    cnp->cn_hash = 0;
4495
4496				    error = nfs_nget(NFSTOMP(dnp), dnp, cnp,
4497				    		fhp, fhsize, &nvattr, &xid, NG_MAKEENTRY, &np);
4498				    if (error)
4499					doit = 0;
4500				    else
4501					newvp = NFSTOV(np);
4502				}
4503			    }
4504			    /* update attributes if not already updated */
4505			    if (doit && bigenough && (np->n_xid <= savexid)) {
4506				xid = savexid;
4507				nfs_loadattrcache(np, &nvattr, &xid, 0);
4508				/* any error can be ignored */
4509			    }
4510			} else {
4511			    /* Just skip over the file handle */
4512			    nfsm_chain_get_32(error, &nmrep, fhsize);
4513			    nfsm_chain_adv(error, &nmrep, nfsm_rndup(fhsize));
4514			}
4515			if (newvp != NULLVP) {
4516			    nfs_unlock(np);
4517			    if (newvp == NFSTOV(dnp))
4518				vnode_rele(newvp);
4519			    else
4520				vnode_put(newvp);
4521			    newvp = NULLVP;
4522			}
4523			nfsm_chain_get_32(error, &nmrep, more_dirs);
4524			nfsmout_if(error);
4525		}
4526		/*
4527		 * If at end of rpc data, get the eof boolean
4528		 */
4529		if (!more_dirs) {
4530			nfsm_chain_get_32(error, &nmrep, eof);
4531			if (!error)
4532				more_dirs = (eof == 0);
4533		}
4534		if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED)))
4535			error = lockerror;
4536		nfsmout_if(error);
4537		nfsm_chain_cleanup(&nmrep);
4538		nfsm_chain_null(&nmreq);
4539	}
4540	if (!lockerror) {
4541		nfs_unlock(dnp);
4542		lockerror = ENOENT;
4543	}
4544	/*
4545	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
4546	 * by increasing d_reclen for the last record.
4547	 */
4548	if (blksiz > 0) {
4549		left = DIRBLKSIZ - blksiz;
4550		dp->d_reclen += left;
4551		uio_iov_base_add(uiop, left);
4552		uio_iov_len_add(uiop, -left);
4553		uiop->uio_offset += left;
4554		uio_uio_resid_add(uiop, -left);
4555	}
4556
4557	if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE)))
4558		error = lockerror;
4559	nfsmout_if(error);
4560
4561	/*
4562	 * We are now either at the end of the directory or have filled the
4563	 * block.
4564	 */
4565	if (bigenough)
4566		dnp->n_direofoffset = uiop->uio_offset;
4567	else {
4568		if (uio_uio_resid(uiop) > 0)
4569			printf("EEK! readdirplus_rpc resid > 0\n");
4570		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
4571		if (cookiep)
4572			*cookiep = cookie;
4573	}
4574
4575nfsmout:
4576	if (!lockerror)
4577		nfs_unlock(dnp);
4578	nfsm_chain_cleanup(&nmreq);
4579	nfsm_chain_cleanup(&nmrep);
4580	return (error);
4581}
4582
4583/*
4584 * Silly rename. To make the NFS filesystem that is stateless look a little
4585 * more like the "ufs" a remove of an active vnode is translated to a rename
4586 * to a funny looking filename that is removed by nfs_vnop_inactive on the
4587 * nfsnode. There is the potential for another process on a different client
4588 * to create the same funny name between when the lookitup() fails and the
4589 * rename() completes, but...
4590 */
4591
4592/* format of "random" silly names - includes a number and pid */
4593/* (note: shouldn't exceed size of nfs_sillyrename.nsr_name) */
4594#define NFS_SILLYNAME_FORMAT ".nfs.%08x.%04x"
4595/* starting from zero isn't silly enough */
4596static uint32_t nfs_sillyrename_number = 0x20051025;
4597
4598static int
4599nfs_sillyrename(
4600	nfsnode_t dnp,
4601	nfsnode_t np,
4602	struct componentname *cnp,
4603	vfs_context_t ctx)
4604{
4605	struct nfs_sillyrename *nsp;
4606	int error;
4607	short pid;
4608	kauth_cred_t cred;
4609	uint32_t num;
4610	struct nfsmount *nmp;
4611
4612	nmp = NFSTONMP(dnp);
4613	if (!nmp)
4614		return (ENXIO);
4615
4616	cache_purge(NFSTOV(np));
4617
4618	MALLOC_ZONE(nsp, struct nfs_sillyrename *,
4619			sizeof (struct nfs_sillyrename), M_NFSREQ, M_WAITOK);
4620	if (!nsp)
4621		return (ENOMEM);
4622	cred = vfs_context_ucred(ctx);
4623	kauth_cred_ref(cred);
4624	nsp->nsr_cred = cred;
4625	nsp->nsr_dnp = dnp;
4626	error = vnode_ref(NFSTOV(dnp));
4627	if (error)
4628		goto bad_norele;
4629
4630	/* Fudge together a funny name */
4631	pid = vfs_context_pid(ctx);
4632	num = OSAddAtomic(1, (SInt32*)&nfs_sillyrename_number);
4633	nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
4634				NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
4635	if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name))
4636		nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
4637
4638	/* Try lookitups until we get one that isn't there */
4639	while (nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, NULL) == 0) {
4640		num = OSAddAtomic(1, (SInt32*)&nfs_sillyrename_number);
4641		nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name),
4642					NFS_SILLYNAME_FORMAT, num, (pid & 0xffff));
4643		if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name))
4644			nsp->nsr_namlen = sizeof(nsp->nsr_name) - 1;
4645	}
4646
4647	/* now, do the rename */
4648	error = nmp->nm_funcs->nf_rename_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
4649					dnp, nsp->nsr_name, nsp->nsr_namlen, ctx);
4650	if (!error && (dnp->n_flag & NNEGNCENTRIES)) {
4651		dnp->n_flag &= ~NNEGNCENTRIES;
4652		cache_purge_negatives(NFSTOV(dnp));
4653	}
4654	FSDBG(267, dnp, np, num, error);
4655	if (error)
4656		goto bad;
4657	error = nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, &np);
4658	np->n_sillyrename = nsp;
4659	return (0);
4660bad:
4661	vnode_rele(NFSTOV(dnp));
4662bad_norele:
4663	nsp->nsr_cred = NOCRED;
4664	kauth_cred_unref(&cred);
4665	FREE_ZONE(nsp, sizeof(*nsp), M_NFSREQ);
4666	return (error);
4667}
4668
4669int
4670nfs3_lookup_rpc_async(
4671	nfsnode_t dnp,
4672	char *name,
4673	int namelen,
4674	vfs_context_t ctx,
4675	struct nfsreq **reqp)
4676{
4677	struct nfsmount *nmp;
4678	struct nfsm_chain nmreq;
4679	int error = 0, nfsvers;
4680
4681	nmp = NFSTONMP(dnp);
4682	if (!nmp)
4683		return (ENXIO);
4684	nfsvers = nmp->nm_vers;
4685
4686	nfsm_chain_null(&nmreq);
4687
4688	nfsm_chain_build_alloc_init(error, &nmreq,
4689		NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
4690	nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
4691	nfsm_chain_add_string(error, &nmreq, name, namelen);
4692	nfsm_chain_build_done(error, &nmreq);
4693	nfsmout_if(error);
4694	error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_LOOKUP,
4695			vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, reqp);
4696nfsmout:
4697	nfsm_chain_cleanup(&nmreq);
4698	return (error);
4699}
4700
4701int
4702nfs3_lookup_rpc_async_finish(
4703	nfsnode_t dnp,
4704	vfs_context_t ctx,
4705	struct nfsreq *req,
4706	u_int64_t *xidp,
4707	fhandle_t *fhp,
4708	struct nfs_vattr *nvap)
4709{
4710	int error = 0, status, nfsvers, attrflag;
4711	u_int64_t xid;
4712	struct nfsmount *nmp;
4713	struct nfsm_chain nmrep;
4714
4715	nmp = NFSTONMP(dnp);
4716	nfsvers = nmp->nm_vers;
4717
4718	nfsm_chain_null(&nmrep);
4719
4720	error = nfs_request_async_finish(req, &nmrep, xidp, &status);
4721
4722	xid = *xidp;
4723	if (error || status) {
4724		if (nfsvers == NFS_VER3)
4725			nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
4726		if (!error)
4727			error = status;
4728		goto nfsmout;
4729	}
4730
4731	nfsmout_if(error || !fhp || !nvap);
4732
4733	/* get the file handle */
4734	nfsm_chain_get_fh(error, &nmrep, nfsvers, fhp);
4735
4736	/* get the attributes */
4737	if (nfsvers == NFS_VER3) {
4738		nfsm_chain_postop_attr_get(error, &nmrep, attrflag, nvap);
4739		nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid);
4740		if (!error && !attrflag)
4741			error = nfs3_getattr_rpc(NULL, NFSTOMP(dnp), fhp->fh_data, fhp->fh_len, ctx, nvap, xidp);
4742	} else {
4743		error = nfs_parsefattr(&nmrep, nfsvers, nvap);
4744	}
4745nfsmout:
4746	nfsm_chain_cleanup(&nmrep);
4747	return (error);
4748}
4749
4750/*
4751 * Look up a file name and optionally either update the file handle or
4752 * allocate an nfsnode, depending on the value of npp.
4753 * npp == NULL	--> just do the lookup
4754 * *npp == NULL --> allocate a new nfsnode and make sure attributes are
4755 *			handled too
4756 * *npp != NULL --> update the file handle in the vnode
4757 */
4758int
4759nfs_lookitup(
4760	nfsnode_t dnp,
4761	char *name,
4762	int namelen,
4763	vfs_context_t ctx,
4764	nfsnode_t *npp)
4765{
4766	int error = 0;
4767	nfsnode_t np, newnp = NULL;
4768	u_int64_t xid;
4769	fhandle_t fh;
4770	struct nfsmount *nmp;
4771	struct nfs_vattr nvattr;
4772	struct nfsreq rq, *req = &rq;
4773
4774	nmp = NFSTONMP(dnp);
4775	if (!nmp)
4776		return (ENXIO);
4777
4778	if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
4779	    (namelen > (long)nmp->nm_fsattr.nfsa_maxname))
4780		return (ENAMETOOLONG);
4781
4782	/* check for lookup of "." */
4783	if ((name[0] == '.') && (namelen == 1)) {
4784		/* skip lookup, we know who we are */
4785		fh.fh_len = 0;
4786		newnp = dnp;
4787		goto nfsmout;
4788	}
4789
4790	error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, name, namelen, ctx, &req);
4791	nfsmout_if(error);
4792	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, req, &xid, &fh, &nvattr);
4793	nfsmout_if(!npp || error);
4794
4795	if (*npp) {
4796		np = *npp;
4797		if (fh.fh_len != np->n_fhsize) {
4798			u_char *oldbuf = (np->n_fhsize > NFS_SMALLFH) ? np->n_fhp : NULL;
4799			if (fh.fh_len > NFS_SMALLFH) {
4800				MALLOC_ZONE(np->n_fhp, u_char *, fh.fh_len, M_NFSBIGFH, M_WAITOK);
4801				if (!np->n_fhp) {
4802				    np->n_fhp = oldbuf;
4803				    error = ENOMEM;
4804				    goto nfsmout;
4805				}
4806			} else {
4807				np->n_fhp = &np->n_fh[0];
4808			}
4809			if (oldbuf)
4810				FREE_ZONE(oldbuf, np->n_fhsize, M_NFSBIGFH);
4811		}
4812		bcopy(fh.fh_data, np->n_fhp, fh.fh_len);
4813		np->n_fhsize = fh.fh_len;
4814		error = nfs_loadattrcache(np, &nvattr, &xid, 0);
4815		nfsmout_if(error);
4816		newnp = np;
4817	} else if (NFS_CMPFH(dnp, fh.fh_data, fh.fh_len)) {
4818		if (dnp->n_xid <= xid)
4819			error = nfs_loadattrcache(dnp, &nvattr, &xid, 0);
4820		nfsmout_if(error);
4821		newnp = dnp;
4822	} else {
4823		struct componentname cn, *cnp = &cn;
4824		bzero(cnp, sizeof(*cnp));
4825		cnp->cn_nameptr = name;
4826		cnp->cn_namelen = namelen;
4827		error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len,
4828			    &nvattr, &xid, NG_MAKEENTRY, &np);
4829		nfsmout_if(error);
4830		newnp = np;
4831	}
4832
4833nfsmout:
4834	if (npp && !*npp && !error)
4835		*npp = newnp;
4836	return (error);
4837}
4838
4839/*
4840 * set up and initialize a "._" file lookup structure used for
4841 * performing async lookups.
4842 */
4843void
4844nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, int namelen)
4845{
4846	int error, du_namelen;
4847	vnode_t du_vp;
4848
4849	/* check for ._ file in name cache */
4850	dulp->du_flags = 0;
4851	bzero(&dulp->du_cn, sizeof(dulp->du_cn));
4852	du_namelen = namelen + 2;
4853	if ((namelen >= 2) && (name[0] == '.') && (name[1] == '_'))
4854		return;
4855	if (du_namelen >= (int)sizeof(dulp->du_smallname))
4856		MALLOC(dulp->du_cn.cn_nameptr, char *, du_namelen + 1, M_TEMP, M_WAITOK);
4857	else
4858		dulp->du_cn.cn_nameptr = dulp->du_smallname;
4859	if (!dulp->du_cn.cn_nameptr)
4860		return;
4861	dulp->du_cn.cn_namelen = du_namelen;
4862	snprintf(dulp->du_cn.cn_nameptr, du_namelen + 1, "._%s", name);
4863	dulp->du_cn.cn_nameptr[du_namelen] = '\0';
4864
4865	error = cache_lookup(NFSTOV(dnp), &du_vp, &dulp->du_cn);
4866	if (error == -1)
4867		vnode_put(du_vp);
4868	else if (!error)
4869		dulp->du_flags |= NFS_DULOOKUP_DOIT;
4870	else if (dulp->du_cn.cn_nameptr != dulp->du_smallname)
4871		FREE(dulp->du_cn.cn_nameptr, M_TEMP);
4872}
4873
4874/*
4875 * start an async "._" file lookup request
4876 */
4877void
4878nfs_dulookup_start(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
4879{
4880	struct nfsmount *nmp = NFSTONMP(dnp);
4881	struct nfsreq *req = &dulp->du_req;
4882
4883	if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_DOIT))
4884		return;
4885	if (!nmp->nm_funcs->nf_lookup_rpc_async(dnp, dulp->du_cn.cn_nameptr,
4886			dulp->du_cn.cn_namelen, ctx, &req))
4887		dulp->du_flags |= NFS_DULOOKUP_INPROG;
4888}
4889
4890/*
4891 * finish an async "._" file lookup request and clean up the structure
4892 */
4893void
4894nfs_dulookup_finish(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx)
4895{
4896	struct nfsmount *nmp = NFSTONMP(dnp);
4897	int error;
4898	nfsnode_t du_np;
4899	u_int64_t xid;
4900	fhandle_t fh;
4901	struct nfs_vattr nvattr;
4902
4903	if (!nmp || !(dulp->du_flags & NFS_DULOOKUP_INPROG))
4904		goto out;
4905
4906	error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, ctx, &dulp->du_req, &xid, &fh, &nvattr);
4907	dulp->du_flags &= ~NFS_DULOOKUP_INPROG;
4908	if (error == ENOENT) {
4909		/* add a negative entry in the name cache */
4910		cache_enter(NFSTOV(dnp), NULL, &dulp->du_cn);
4911		dnp->n_flag |= NNEGNCENTRIES;
4912	} else if (!error) {
4913		error = nfs_nget(NFSTOMP(dnp), dnp, &dulp->du_cn, fh.fh_data, fh.fh_len,
4914			    &nvattr, &xid, NG_MAKEENTRY, &du_np);
4915		if (!error) {
4916			nfs_unlock(du_np);
4917			vnode_put(NFSTOV(du_np));
4918		}
4919	}
4920out:
4921	if (dulp->du_flags & NFS_DULOOKUP_INPROG)
4922		nfs_request_async_cancel(&dulp->du_req);
4923	if (dulp->du_cn.cn_nameptr && (dulp->du_cn.cn_nameptr != dulp->du_smallname))
4924		FREE(dulp->du_cn.cn_nameptr, M_TEMP);
4925}
4926
4927
4928/*
4929 * NFS Version 3 commit RPC
4930 */
4931int
4932nfs3_commit_rpc(
4933	nfsnode_t np,
4934	u_int64_t offset,
4935	u_int64_t count,
4936	kauth_cred_t cred)
4937{
4938	struct nfsmount *nmp;
4939	int error = 0, lockerror, status, wccpostattr = 0, nfsvers;
4940	struct timespec premtime = { 0, 0 };
4941	u_int64_t xid, wverf;
4942	uint32_t count32;
4943	struct nfsm_chain nmreq, nmrep;
4944
4945	nmp = NFSTONMP(np);
4946	FSDBG(521, np, offset, count, nmp ? nmp->nm_state : 0);
4947	if (!nmp)
4948		return (ENXIO);
4949	if (!(nmp->nm_state & NFSSTA_HASWRITEVERF))
4950		return (0);
4951	nfsvers = nmp->nm_vers;
4952
4953	if (count > UINT32_MAX)
4954		count32 = 0;
4955	else
4956		count32 = count;
4957
4958	nfsm_chain_null(&nmreq);
4959	nfsm_chain_null(&nmrep);
4960
4961	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
4962	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
4963	nfsm_chain_add_64(error, &nmreq, offset);
4964	nfsm_chain_add_32(error, &nmreq, count32);
4965	nfsm_chain_build_done(error, &nmreq);
4966	nfsmout_if(error);
4967	error = nfs_request2(np, NULL, &nmreq, NFSPROC_COMMIT,
4968			current_thread(), cred, 0, &nmrep, &xid, &status);
4969	if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
4970		error = lockerror;
4971	/* can we do anything useful with the wcc info? */
4972	nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
4973	if (!lockerror)
4974		nfs_unlock(np);
4975	if (!error)
4976		error = status;
4977	nfsm_chain_get_64(error, &nmrep, wverf);
4978	nfsmout_if(error);
4979	lck_mtx_lock(&nmp->nm_lock);
4980	if (nmp->nm_verf != wverf) {
4981		nmp->nm_verf = wverf;
4982		error = NFSERR_STALEWRITEVERF;
4983	}
4984	lck_mtx_unlock(&nmp->nm_lock);
4985nfsmout:
4986	nfsm_chain_cleanup(&nmreq);
4987	nfsm_chain_cleanup(&nmrep);
4988	return (error);
4989}
4990
4991
4992static int
4993nfs_vnop_blockmap(
4994	__unused struct vnop_blockmap_args /* {
4995		struct vnodeop_desc *a_desc;
4996		vnode_t a_vp;
4997		off_t a_foffset;
4998		size_t a_size;
4999		daddr64_t *a_bpn;
5000		size_t *a_run;
5001		void *a_poff;
5002		int a_flags;
5003	} */ *ap)
5004{
5005	return (ENOTSUP);
5006}
5007
5008/*
5009 * Mmap a file
5010 *
5011 * NB Currently unsupported.
5012 */
5013/*ARGSUSED*/
5014static int
5015nfs_vnop_mmap(
5016	__unused struct vnop_mmap_args /* {
5017		struct vnodeop_desc *a_desc;
5018		vnode_t a_vp;
5019		int a_fflags;
5020		vfs_context_t a_context;
5021	} */ *ap)
5022{
5023	return (EINVAL);
5024}
5025
5026/*
5027 * fsync vnode op. Just call nfs_flush().
5028 */
5029/* ARGSUSED */
5030static int
5031nfs_vnop_fsync(
5032	struct vnop_fsync_args /* {
5033		struct vnodeop_desc *a_desc;
5034		vnode_t a_vp;
5035		int a_waitfor;
5036		vfs_context_t a_context;
5037	} */ *ap)
5038{
5039	return (nfs_flush(VTONFS(ap->a_vp), ap->a_waitfor, vfs_context_thread(ap->a_context), 0));
5040}
5041
5042
5043/*
5044 * Do an NFS pathconf RPC.
5045 */
5046int
5047nfs3_pathconf_rpc(
5048	nfsnode_t np,
5049	struct nfs_fsattr *nfsap,
5050	vfs_context_t ctx)
5051{
5052	u_int64_t xid;
5053	int error = 0, lockerror, status, nfsvers;
5054	struct nfsm_chain nmreq, nmrep;
5055	struct nfsmount *nmp = NFSTONMP(np);
5056	uint32_t val = 0;
5057
5058	if (!nmp)
5059		return (ENXIO);
5060	nfsvers = nmp->nm_vers;
5061
5062	nfsm_chain_null(&nmreq);
5063	nfsm_chain_null(&nmrep);
5064
5065	/* fetch pathconf info from server */
5066	nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(NFS_VER3));
5067	nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
5068	nfsm_chain_build_done(error, &nmreq);
5069	nfsmout_if(error);
5070	error = nfs_request(np, NULL, &nmreq, NFSPROC_PATHCONF, ctx,
5071			&nmrep, &xid, &status);
5072	if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
5073		error = lockerror;
5074	nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
5075	if (!lockerror)
5076		nfs_unlock(np);
5077	if (!error)
5078		error = status;
5079	nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxlink);
5080	nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxname);
5081	nfsm_chain_get_32(error, &nmrep, val);
5082	if (val)
5083		nfsap->nfsa_flags |= NFS_FSFLAG_NO_TRUNC;
5084	nfsm_chain_get_32(error, &nmrep, val);
5085	if (val)
5086		nfsap->nfsa_flags |= NFS_FSFLAG_CHOWN_RESTRICTED;
5087	nfsm_chain_get_32(error, &nmrep, val);
5088	if (val)
5089		nfsap->nfsa_flags |= NFS_FSFLAG_CASE_INSENSITIVE;
5090	nfsm_chain_get_32(error, &nmrep, val);
5091	if (val)
5092		nfsap->nfsa_flags |= NFS_FSFLAG_CASE_PRESERVING;
5093	NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK);
5094	NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME);
5095	NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC);
5096	NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
5097	NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
5098	NFS_BITMAP_SET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
5099nfsmout:
5100	nfsm_chain_cleanup(&nmreq);
5101	nfsm_chain_cleanup(&nmrep);
5102	return (error);
5103}
5104
5105/* save pathconf info for NFSv3 mount */
5106void
5107nfs3_pathconf_cache(struct nfsmount *nmp, struct nfs_fsattr *nfsap)
5108{
5109	nmp->nm_fsattr.nfsa_maxlink = nfsap->nfsa_maxlink;
5110	nmp->nm_fsattr.nfsa_maxname = nfsap->nfsa_maxname;
5111	nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC;
5112	nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED;
5113	nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE;
5114	nmp->nm_fsattr.nfsa_flags |= nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING;
5115	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXLINK);
5116	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME);
5117	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_NO_TRUNC);
5118	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED);
5119	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE);
5120	NFS_BITMAP_SET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_PRESERVING);
5121	nmp->nm_state |= NFSSTA_GOTPATHCONF;
5122}
5123
5124/*
5125 * Return POSIX pathconf information applicable to nfs.
5126 *
5127 * The NFS V2 protocol doesn't support this, so just return EINVAL
5128 * for V2.
5129 */
5130/* ARGSUSED */
5131static int
5132nfs_vnop_pathconf(
5133	struct vnop_pathconf_args /* {
5134		struct vnodeop_desc *a_desc;
5135		vnode_t a_vp;
5136		int a_name;
5137		register_t *a_retval;
5138		vfs_context_t a_context;
5139	} */ *ap)
5140{
5141	vnode_t vp = ap->a_vp;
5142	nfsnode_t np = VTONFS(vp);
5143	struct nfsmount *nmp;
5144	struct nfs_fsattr nfsa, *nfsap;
5145	int error = 0;
5146	uint64_t maxFileSize;
5147	uint nbits;
5148
5149	nmp = VTONMP(vp);
5150	if (!nmp)
5151		return (ENXIO);
5152
5153	switch (ap->a_name) {
5154	case _PC_LINK_MAX:
5155	case _PC_NAME_MAX:
5156	case _PC_CHOWN_RESTRICTED:
5157	case _PC_NO_TRUNC:
5158	case _PC_CASE_SENSITIVE:
5159	case _PC_CASE_PRESERVING:
5160		break;
5161	case _PC_FILESIZEBITS:
5162		if (nmp->nm_vers == NFS_VER2) {
5163			*ap->a_retval = 32;
5164			return (0);
5165		}
5166		break;
5167	default:
5168		/* don't bother contacting the server if we know the answer */
5169		return (EINVAL);
5170	}
5171
5172	if (nmp->nm_vers == NFS_VER2)
5173		return (EINVAL);
5174
5175	lck_mtx_lock(&nmp->nm_lock);
5176	if (nmp->nm_vers == NFS_VER3) {
5177		if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) {
5178			/* no pathconf info cached */
5179			lck_mtx_unlock(&nmp->nm_lock);
5180			NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
5181			error = nfs3_pathconf_rpc(np, &nfsa, ap->a_context);
5182			if (error)
5183				return (error);
5184			nmp = VTONMP(vp);
5185			if (!nmp)
5186				return (ENXIO);
5187			lck_mtx_lock(&nmp->nm_lock);
5188			if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS) {
5189				/* all files have the same pathconf info, */
5190				/* so cache a copy of the results */
5191				nfs3_pathconf_cache(nmp, &nfsa);
5192			}
5193			nfsap = &nfsa;
5194		} else {
5195			nfsap = &nmp->nm_fsattr;
5196		}
5197	} else if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) {
5198		/* no pathconf info cached */
5199		lck_mtx_unlock(&nmp->nm_lock);
5200		NFS_CLEAR_ATTRIBUTES(nfsa.nfsa_bitmap);
5201		error = nfs4_pathconf_rpc(np, &nfsa, ap->a_context);
5202		if (error)
5203			return (error);
5204		nmp = VTONMP(vp);
5205		if (!nmp)
5206			return (ENXIO);
5207		lck_mtx_lock(&nmp->nm_lock);
5208		nfsap = &nfsa;
5209	} else {
5210		nfsap = &nmp->nm_fsattr;
5211	}
5212
5213	switch (ap->a_name) {
5214	case _PC_LINK_MAX:
5215		if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXLINK))
5216			*ap->a_retval = nfsap->nfsa_maxlink;
5217		else if ((nmp->nm_vers == NFS_VER4) && NFS_BITMAP_ISSET(np->n_vattr.nva_bitmap, NFS_FATTR_MAXLINK))
5218			*ap->a_retval = np->n_vattr.nva_maxlink;
5219		else
5220			error = EINVAL;
5221		break;
5222	case _PC_NAME_MAX:
5223		if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXNAME))
5224			*ap->a_retval = nfsap->nfsa_maxname;
5225		else
5226			error = EINVAL;
5227		break;
5228	case _PC_CHOWN_RESTRICTED:
5229		if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED))
5230			*ap->a_retval = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED) ? 200112 /* _POSIX_CHOWN_RESTRICTED */ : 0;
5231		else
5232			error = EINVAL;
5233		break;
5234	case _PC_NO_TRUNC:
5235		if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC))
5236			*ap->a_retval = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NO_TRUNC) ? 200112 /* _POSIX_NO_TRUNC */ : 0;
5237		else
5238			error = EINVAL;
5239		break;
5240	case _PC_CASE_SENSITIVE:
5241		if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE))
5242			*ap->a_retval = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE) ? 0 : 1;
5243		else
5244			error = EINVAL;
5245		break;
5246	case _PC_CASE_PRESERVING:
5247		if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING))
5248			*ap->a_retval = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_PRESERVING) ? 1 : 0;
5249		else
5250			error = EINVAL;
5251		break;
5252	case _PC_FILESIZEBITS:
5253		if (!NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
5254			*ap->a_retval = 64;
5255			error = 0;
5256			break;
5257		}
5258		maxFileSize = nmp->nm_fsattr.nfsa_maxfilesize;
5259		nbits = 1;
5260		if (maxFileSize & 0xffffffff00000000ULL) {
5261			nbits += 32;
5262			maxFileSize >>= 32;
5263		}
5264		if (maxFileSize & 0xffff0000) {
5265			nbits += 16;
5266			maxFileSize >>= 16;
5267		}
5268		if (maxFileSize & 0xff00) {
5269			nbits += 8;
5270			maxFileSize >>= 8;
5271		}
5272		if (maxFileSize & 0xf0) {
5273			nbits += 4;
5274			maxFileSize >>= 4;
5275		}
5276		if (maxFileSize & 0xc) {
5277			nbits += 2;
5278			maxFileSize >>= 2;
5279		}
5280		if (maxFileSize & 0x2) {
5281			nbits += 1;
5282		}
5283		*ap->a_retval = nbits;
5284		break;
5285	default:
5286		error = EINVAL;
5287	}
5288
5289	lck_mtx_unlock(&nmp->nm_lock);
5290
5291	return (error);
5292}
5293
5294/*
5295 * Read wrapper for special devices.
5296 */
5297static int
5298nfsspec_vnop_read(
5299	struct vnop_read_args /* {
5300		struct vnodeop_desc *a_desc;
5301		vnode_t a_vp;
5302		struct uio *a_uio;
5303		int a_ioflag;
5304		vfs_context_t a_context;
5305	} */ *ap)
5306{
5307	nfsnode_t np = VTONFS(ap->a_vp);
5308	struct timeval now;
5309	int error;
5310
5311	/*
5312	 * Set access flag.
5313	 */
5314	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
5315		return (error);
5316	np->n_flag |= NACC;
5317	microtime(&now);
5318	np->n_atim.tv_sec = now.tv_sec;
5319	np->n_atim.tv_nsec = now.tv_usec * 1000;
5320	nfs_unlock(np);
5321	return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_read), ap));
5322}
5323
5324/*
5325 * Write wrapper for special devices.
5326 */
5327static int
5328nfsspec_vnop_write(
5329	struct vnop_write_args /* {
5330		struct vnodeop_desc *a_desc;
5331		vnode_t a_vp;
5332		struct uio *a_uio;
5333		int a_ioflag;
5334		vfs_context_t a_context;
5335	} */ *ap)
5336{
5337	nfsnode_t np = VTONFS(ap->a_vp);
5338	struct timeval now;
5339	int error;
5340
5341	/*
5342	 * Set update flag.
5343	 */
5344	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
5345		return (error);
5346	np->n_flag |= NUPD;
5347	microtime(&now);
5348	np->n_mtim.tv_sec = now.tv_sec;
5349	np->n_mtim.tv_nsec = now.tv_usec * 1000;
5350	nfs_unlock(np);
5351	return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_write), ap));
5352}
5353
5354/*
5355 * Close wrapper for special devices.
5356 *
5357 * Update the times on the nfsnode then do device close.
5358 */
5359static int
5360nfsspec_vnop_close(
5361	struct vnop_close_args /* {
5362		struct vnodeop_desc *a_desc;
5363		vnode_t a_vp;
5364		int a_fflag;
5365		vfs_context_t a_context;
5366	} */ *ap)
5367{
5368	vnode_t vp = ap->a_vp;
5369	nfsnode_t np = VTONFS(vp);
5370	struct vnode_attr vattr;
5371	mount_t mp;
5372	int error;
5373
5374	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
5375		return (error);
5376	if (np->n_flag & (NACC | NUPD)) {
5377		np->n_flag |= NCHG;
5378		if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
5379			VATTR_INIT(&vattr);
5380			if (np->n_flag & NACC) {
5381				vattr.va_access_time = np->n_atim;
5382				VATTR_SET_ACTIVE(&vattr, va_access_time);
5383			}
5384			if (np->n_flag & NUPD) {
5385				vattr.va_modify_time = np->n_mtim;
5386				VATTR_SET_ACTIVE(&vattr, va_modify_time);
5387			}
5388			nfs_unlock(np);
5389			vnode_setattr(vp, &vattr, ap->a_context);
5390		} else {
5391			nfs_unlock(np);
5392		}
5393	} else {
5394		nfs_unlock(np);
5395	}
5396	return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_close), ap));
5397}
5398
5399#if FIFO
5400extern vnop_t **fifo_vnodeop_p;
5401
5402/*
5403 * Read wrapper for fifos.
5404 */
5405static int
5406nfsfifo_vnop_read(
5407	struct vnop_read_args /* {
5408		struct vnodeop_desc *a_desc;
5409		vnode_t a_vp;
5410		struct uio *a_uio;
5411		int a_ioflag;
5412		vfs_context_t a_context;
5413	} */ *ap)
5414{
5415	nfsnode_t np = VTONFS(ap->a_vp);
5416	struct timeval now;
5417	int error;
5418
5419	/*
5420	 * Set access flag.
5421	 */
5422	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
5423		return (error);
5424	np->n_flag |= NACC;
5425	microtime(&now);
5426	np->n_atim.tv_sec = now.tv_sec;
5427	np->n_atim.tv_nsec = now.tv_usec * 1000;
5428	nfs_unlock(np);
5429	return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_read), ap));
5430}
5431
5432/*
5433 * Write wrapper for fifos.
5434 */
5435static int
5436nfsfifo_vnop_write(
5437	struct vnop_write_args /* {
5438		struct vnodeop_desc *a_desc;
5439		vnode_t a_vp;
5440		struct uio *a_uio;
5441		int a_ioflag;
5442		vfs_context_t a_context;
5443	} */ *ap)
5444{
5445	nfsnode_t np = VTONFS(ap->a_vp);
5446	struct timeval now;
5447	int error;
5448
5449	/*
5450	 * Set update flag.
5451	 */
5452	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
5453		return (error);
5454	np->n_flag |= NUPD;
5455	microtime(&now);
5456	np->n_mtim.tv_sec = now.tv_sec;
5457	np->n_mtim.tv_nsec = now.tv_usec * 1000;
5458	nfs_unlock(np);
5459	return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_write), ap));
5460}
5461
5462/*
5463 * Close wrapper for fifos.
5464 *
5465 * Update the times on the nfsnode then do fifo close.
5466 */
5467static int
5468nfsfifo_vnop_close(
5469	struct vnop_close_args /* {
5470		struct vnodeop_desc *a_desc;
5471		vnode_t a_vp;
5472		int a_fflag;
5473		vfs_context_t a_context;
5474	} */ *ap)
5475{
5476	vnode_t vp = ap->a_vp;
5477	nfsnode_t np = VTONFS(vp);
5478	struct vnode_attr vattr;
5479	struct timeval now;
5480	mount_t mp;
5481	int error;
5482
5483	if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))
5484		return (error);
5485	if (np->n_flag & (NACC | NUPD)) {
5486		microtime(&now);
5487		if (np->n_flag & NACC) {
5488			np->n_atim.tv_sec = now.tv_sec;
5489			np->n_atim.tv_nsec = now.tv_usec * 1000;
5490		}
5491		if (np->n_flag & NUPD) {
5492			np->n_mtim.tv_sec = now.tv_sec;
5493			np->n_mtim.tv_nsec = now.tv_usec * 1000;
5494		}
5495		np->n_flag |= NCHG;
5496		if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) {
5497			VATTR_INIT(&vattr);
5498			if (np->n_flag & NACC) {
5499				vattr.va_access_time = np->n_atim;
5500				VATTR_SET_ACTIVE(&vattr, va_access_time);
5501			}
5502			if (np->n_flag & NUPD) {
5503				vattr.va_modify_time = np->n_mtim;
5504				VATTR_SET_ACTIVE(&vattr, va_modify_time);
5505			}
5506			nfs_unlock(np);
5507			vnode_setattr(vp, &vattr, ap->a_context);
5508		} else {
5509			nfs_unlock(np);
5510		}
5511	} else {
5512		nfs_unlock(np);
5513	}
5514	return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_close), ap));
5515}
5516#endif /* FIFO */
5517
5518/*ARGSUSED*/
5519static int
5520nfs_vnop_ioctl(
5521	__unused struct vnop_ioctl_args /* {
5522		struct vnodeop_desc *a_desc;
5523		vnode_t a_vp;
5524		u_long a_command;
5525		caddr_t a_data;
5526		int a_fflag;
5527		vfs_context_t a_context;
5528	} */ *ap)
5529{
5530
5531	/*
5532	 * XXX we were once bogusly enoictl() which returned this (ENOTTY).
5533	 * Probably we should return ENODEV.
5534	 */
5535	return (ENOTTY);
5536}
5537
5538/*ARGSUSED*/
5539static int
5540nfs_vnop_select(
5541	__unused struct vnop_select_args /* {
5542		struct vnodeop_desc *a_desc;
5543		vnode_t a_vp;
5544		int a_which;
5545		int a_fflags;
5546		void *a_wql;
5547		vfs_context_t a_context;
5548	} */ *ap)
5549{
5550
5551	/*
5552	 * We were once bogusly seltrue() which returns 1.  Is this right?
5553	 */
5554	return (1);
5555}
5556
5557/*
5558 * vnode OP for pagein using UPL
5559 *
5560 * No buffer I/O, just RPCs straight into the mapped pages.
5561 */
5562static int
5563nfs_vnop_pagein(
5564	struct vnop_pagein_args /* {
5565		struct vnodeop_desc *a_desc;
5566		vnode_t a_vp;
5567		upl_t a_pl;
5568		vm_offset_t a_pl_offset;
5569		off_t a_f_offset;
5570		size_t a_size;
5571		int a_flags;
5572		vfs_context_t a_context;
5573	} */ *ap)
5574{
5575	vnode_t vp = ap->a_vp;
5576	upl_t pl = ap->a_pl;
5577	size_t size = ap->a_size;
5578	off_t f_offset = ap->a_f_offset;
5579	vm_offset_t pl_offset = ap->a_pl_offset;
5580	int flags = ap->a_flags;
5581	thread_t thd;
5582	kauth_cred_t cred;
5583	nfsnode_t np = VTONFS(vp);
5584	size_t nmrsize, iosize, txsize, rxsize, retsize;
5585	off_t txoffset;
5586	struct nfsmount *nmp;
5587	int error = 0;
5588	vm_offset_t ioaddr;
5589	struct uio	auio;
5590	struct iovec_32	aiov;
5591	struct uio * uio = &auio;
5592	int nofreeupl = flags & UPL_NOCOMMIT;
5593	upl_page_info_t *plinfo;
5594#define MAXPAGINGREQS	16	/* max outstanding RPCs for pagein/pageout */
5595	struct nfsreq *req[MAXPAGINGREQS];
5596	int nextsend, nextwait;
5597
5598	FSDBG(322, np, f_offset, size, flags);
5599	if (pl == (upl_t)NULL)
5600		panic("nfs_pagein: no upl");
5601
5602	if (size <= 0) {
5603		printf("nfs_pagein: invalid size %ld", size);
5604		if (!nofreeupl)
5605			(void) ubc_upl_abort(pl, 0);
5606		return (EINVAL);
5607	}
5608	if (f_offset < 0 || f_offset >= (off_t)np->n_size || (f_offset & PAGE_MASK_64)) {
5609		if (!nofreeupl)
5610			ubc_upl_abort_range(pl, pl_offset, size,
5611				UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
5612		return (EINVAL);
5613	}
5614
5615	thd = vfs_context_thread(ap->a_context);
5616	cred = ubc_getcred(vp);
5617	if (!IS_VALID_CRED(cred))
5618		cred = vfs_context_ucred(ap->a_context);
5619
5620	auio.uio_offset = f_offset;
5621#if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
5622	auio.uio_segflg = UIO_SYSSPACE;
5623#else
5624	auio.uio_segflg = UIO_SYSSPACE32;
5625#endif
5626	auio.uio_rw = UIO_READ;
5627	auio.uio_procp = vfs_context_proc(ap->a_context);
5628
5629	nmp = VTONMP(vp);
5630	if (!nmp) {
5631		if (!nofreeupl)
5632			ubc_upl_abort_range(pl, pl_offset, size,
5633				UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY);
5634		return (ENXIO);
5635	}
5636	nmrsize = nmp->nm_rsize;
5637
5638	plinfo = ubc_upl_pageinfo(pl);
5639	ubc_upl_map(pl, &ioaddr);
5640	ioaddr += pl_offset;
5641	txsize = rxsize = size;
5642	txoffset = f_offset;
5643
5644	bzero(req, sizeof(req));
5645	nextsend = nextwait = 0;
5646	do {
5647		/* send requests while we need to and have available slots */
5648		while ((txsize > 0) && (req[nextsend] == NULL)) {
5649			iosize = MIN(nmrsize, txsize);
5650			if ((error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, iosize, thd, cred, NULL, &req[nextsend]))) {
5651				req[nextsend] = NULL;
5652				break;
5653			}
5654			txoffset += iosize;
5655			txsize -= iosize;
5656			nextsend = (nextsend + 1) % MAXPAGINGREQS;
5657		}
5658		/* wait while we need to and break out if more requests to send */
5659		while ((rxsize > 0) && req[nextwait]) {
5660			iosize = retsize = MIN(nmrsize, rxsize);
5661			aiov.iov_len  = iosize;
5662			aiov.iov_base = (uintptr_t)ioaddr;
5663			auio.uio_iovs.iov32p = &aiov;
5664			auio.uio_iovcnt = 1;
5665			uio_uio_resid_set(&auio, iosize);
5666			FSDBG(322, uio->uio_offset, uio_uio_resid(uio), ioaddr, rxsize);
5667#ifdef UPL_DEBUG
5668			upl_ubc_alias_set(pl, current_thread(), 2);
5669#endif /* UPL_DEBUG */
5670			OSAddAtomic(1, (SInt32*)&nfsstats.pageins);
5671			error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req[nextwait], uio, &retsize, NULL);
5672			req[nextwait] = NULL;
5673			nextwait = (nextwait + 1) % MAXPAGINGREQS;
5674			if (error) {
5675				FSDBG(322, uio->uio_offset, uio_uio_resid(uio), error, -1);
5676				break;
5677			}
5678			if (retsize < iosize) {
5679				/* Just zero fill the rest of the valid area. */
5680				// LP64todo - fix this
5681				int zcnt = iosize - retsize;
5682				bzero((char *)ioaddr + retsize, zcnt);
5683				FSDBG(324, uio->uio_offset, retsize, zcnt, ioaddr);
5684				uio->uio_offset += zcnt;
5685			}
5686			ioaddr += iosize;
5687			rxsize -= iosize;
5688			if (txsize)
5689				break;
5690		}
5691	} while (!error && (txsize || rxsize));
5692
5693	ubc_upl_unmap(pl);
5694
5695	if (error) {
5696		/* cancel any outstanding requests */
5697		while (req[nextwait]) {
5698			nfs_request_async_cancel(req[nextwait]);
5699			req[nextwait] = NULL;
5700			nextwait = (nextwait + 1) % MAXPAGINGREQS;
5701		}
5702	}
5703
5704	if (!nofreeupl) {
5705		if (error)
5706			ubc_upl_abort_range(pl, pl_offset, size,
5707					    UPL_ABORT_ERROR |
5708					    UPL_ABORT_FREE_ON_EMPTY);
5709		else
5710			ubc_upl_commit_range(pl, pl_offset, size,
5711					     UPL_COMMIT_CLEAR_DIRTY |
5712					     UPL_COMMIT_FREE_ON_EMPTY);
5713	}
5714	return (error);
5715}
5716
5717
5718/*
5719 * the following are needed only by nfs_pageout to know how to handle errors
5720 * see nfs_pageout comments on explanation of actions.
5721 * the errors here are copied from errno.h and errors returned by servers
5722 * are expected to match the same numbers here. If not, our actions maybe
5723 * erroneous.
5724 */
5725enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, RETRYWITHSLEEP, SEVER};
5726#define NFS_ELAST 88
5727static u_char errorcount[NFS_ELAST+1]; /* better be zeros when initialized */
5728static const char errortooutcome[NFS_ELAST+1] = {
5729	NOACTION,
5730	DUMP,			/* EPERM	1	Operation not permitted */
5731	DUMP,			/* ENOENT	2	No such file or directory */
5732	DUMPANDLOG,		/* ESRCH	3	No such process */
5733	RETRY,			/* EINTR 	4	Interrupted system call */
5734	DUMP,			/* EIO		5	Input/output error */
5735	DUMP,			/* ENXIO	6	Device not configured */
5736	DUMPANDLOG,		/* E2BIG	7	Argument list too long */
5737	DUMPANDLOG,		/* ENOEXEC	8	Exec format error */
5738	DUMPANDLOG,		/* EBADF	9	Bad file descriptor */
5739	DUMPANDLOG,		/* ECHILD	10	No child processes */
5740	DUMPANDLOG,		/* EDEADLK	11	Resource deadlock avoided - was EAGAIN */
5741	RETRY,			/* ENOMEM	12	Cannot allocate memory */
5742	DUMP,			/* EACCES	13	Permission denied */
5743	DUMPANDLOG,		/* EFAULT	14	Bad address */
5744	DUMPANDLOG,		/* ENOTBLK	15	POSIX - Block device required */
5745	RETRY,			/* EBUSY	16	Device busy */
5746	DUMP,			/* EEXIST	17	File exists */
5747	DUMP,			/* EXDEV	18	Cross-device link */
5748	DUMP,			/* ENODEV	19	Operation not supported by device */
5749	DUMP,			/* ENOTDIR	20	Not a directory */
5750	DUMP,			/* EISDIR 	21	Is a directory */
5751	DUMP,			/* EINVAL	22	Invalid argument */
5752	DUMPANDLOG,		/* ENFILE	23	Too many open files in system */
5753	DUMPANDLOG,		/* EMFILE	24	Too many open files */
5754	DUMPANDLOG,		/* ENOTTY	25	Inappropriate ioctl for device */
5755	DUMPANDLOG,		/* ETXTBSY	26	Text file busy - POSIX */
5756	DUMP,			/* EFBIG	27	File too large */
5757	DUMP,			/* ENOSPC	28	No space left on device */
5758	DUMPANDLOG,		/* ESPIPE	29	Illegal seek */
5759	DUMP,			/* EROFS	30	Read-only file system */
5760	DUMP,			/* EMLINK	31	Too many links */
5761	RETRY,			/* EPIPE	32	Broken pipe */
5762	/* math software */
5763	DUMPANDLOG,		/* EDOM				33	Numerical argument out of domain */
5764	DUMPANDLOG,		/* ERANGE			34	Result too large */
5765	RETRY,			/* EAGAIN/EWOULDBLOCK	35	Resource temporarily unavailable */
5766	DUMPANDLOG,		/* EINPROGRESS		36	Operation now in progress */
5767	DUMPANDLOG,		/* EALREADY			37	Operation already in progress */
5768	/* ipc/network software -- argument errors */
5769	DUMPANDLOG,		/* ENOTSOC			38	Socket operation on non-socket */
5770	DUMPANDLOG,		/* EDESTADDRREQ		39	Destination address required */
5771	DUMPANDLOG,		/* EMSGSIZE			40	Message too long */
5772	DUMPANDLOG,		/* EPROTOTYPE		41	Protocol wrong type for socket */
5773	DUMPANDLOG,		/* ENOPROTOOPT		42	Protocol not available */
5774	DUMPANDLOG,		/* EPROTONOSUPPORT	43	Protocol not supported */
5775	DUMPANDLOG,		/* ESOCKTNOSUPPORT	44	Socket type not supported */
5776	DUMPANDLOG,		/* ENOTSUP			45	Operation not supported */
5777	DUMPANDLOG,		/* EPFNOSUPPORT		46	Protocol family not supported */
5778	DUMPANDLOG,		/* EAFNOSUPPORT		47	Address family not supported by protocol family */
5779	DUMPANDLOG,		/* EADDRINUSE		48	Address already in use */
5780	DUMPANDLOG,		/* EADDRNOTAVAIL	49	Can't assign requested address */
5781	/* ipc/network software -- operational errors */
5782	RETRY,			/* ENETDOWN			50	Network is down */
5783	RETRY,			/* ENETUNREACH		51	Network is unreachable */
5784	RETRY,			/* ENETRESET		52	Network dropped connection on reset */
5785	RETRY,			/* ECONNABORTED		53	Software caused connection abort */
5786	RETRY,			/* ECONNRESET		54	Connection reset by peer */
5787	RETRY,			/* ENOBUFS			55	No buffer space available */
5788	RETRY,			/* EISCONN			56	Socket is already connected */
5789	RETRY,			/* ENOTCONN			57	Socket is not connected */
5790	RETRY,			/* ESHUTDOWN		58	Can't send after socket shutdown */
5791	RETRY,			/* ETOOMANYREFS		59	Too many references: can't splice */
5792	RETRY,			/* ETIMEDOUT		60	Operation timed out */
5793	RETRY,			/* ECONNREFUSED		61	Connection refused */
5794
5795	DUMPANDLOG,		/* ELOOP			62	Too many levels of symbolic links */
5796	DUMP,			/* ENAMETOOLONG		63	File name too long */
5797	RETRY,			/* EHOSTDOWN		64	Host is down */
5798	RETRY,			/* EHOSTUNREACH		65	No route to host */
5799	DUMP,			/* ENOTEMPTY		66	Directory not empty */
5800	/* quotas & mush */
5801	DUMPANDLOG,		/* PROCLIM			67	Too many processes */
5802	DUMPANDLOG,		/* EUSERS			68	Too many users */
5803	DUMPANDLOG,		/* EDQUOT			69	Disc quota exceeded */
5804	/* Network File System */
5805	DUMP,			/* ESTALE			70	Stale NFS file handle */
5806	DUMP,			/* EREMOTE			71	Too many levels of remote in path */
5807	DUMPANDLOG,		/* EBADRPC			72	RPC struct is bad */
5808	DUMPANDLOG,		/* ERPCMISMATCH		73	RPC version wrong */
5809	DUMPANDLOG,		/* EPROGUNAVAIL		74	RPC prog. not avail */
5810	DUMPANDLOG,		/* EPROGMISMATCH	75	Program version wrong */
5811	DUMPANDLOG,		/* EPROCUNAVAIL		76	Bad procedure for program */
5812
5813	DUMPANDLOG,		/* ENOLCK			77	No locks available */
5814	DUMPANDLOG,		/* ENOSYS			78	Function not implemented */
5815	DUMPANDLOG,		/* EFTYPE			79	Inappropriate file type or format */
5816	DUMPANDLOG,		/* EAUTH			80	Authentication error */
5817	DUMPANDLOG,		/* ENEEDAUTH		81	Need authenticator */
5818	/* Intelligent device errors */
5819	DUMPANDLOG,		/* EPWROFF			82	Device power is off */
5820	DUMPANDLOG,		/* EDEVERR			83	Device error, e.g. paper out */
5821	DUMPANDLOG,		/* EOVERFLOW		84	Value too large to be stored in data type */
5822	/* Program loading errors */
5823	DUMPANDLOG,		/* EBADEXEC			85	Bad executable */
5824	DUMPANDLOG,		/* EBADARCH			86	Bad CPU type in executable */
5825	DUMPANDLOG,		/* ESHLIBVERS		87	Shared library version mismatch */
5826	DUMPANDLOG,		/* EBADMACHO		88	Malformed Macho file */
5827};
5828
5829static char
5830nfs_pageouterrorhandler(int error)
5831{
5832	if (error > NFS_ELAST)
5833		return(DUMP);
5834	else
5835		return(errortooutcome[error]);
5836}
5837
5838
5839/*
5840 * vnode OP for pageout using UPL
5841 *
5842 * No buffer I/O, just RPCs straight from the mapped pages.
5843 * File size changes are not permitted in pageout.
5844 */
5845static int
5846nfs_vnop_pageout(
5847	struct vnop_pageout_args /* {
5848		struct vnodeop_desc *a_desc;
5849		vnode_t a_vp;
5850		upl_t a_pl;
5851		vm_offset_t a_pl_offset;
5852		off_t a_f_offset;
5853		size_t a_size;
5854		int a_flags;
5855		vfs_context_t a_context;
5856	} */ *ap)
5857{
5858	vnode_t vp = ap->a_vp;
5859	upl_t pl = ap->a_pl;
5860	size_t size = ap->a_size;
5861	off_t f_offset = ap->a_f_offset;
5862	vm_offset_t pl_offset = ap->a_pl_offset;
5863	int flags = ap->a_flags;
5864	nfsnode_t np = VTONFS(vp);
5865	thread_t thd;
5866	kauth_cred_t cred;
5867	struct nfsbuf *bp;
5868	struct nfsmount *nmp = VTONMP(vp);
5869	daddr64_t lbn;
5870	int error = 0, iomode;
5871	off_t off, txoffset, rxoffset;
5872	vm_offset_t ioaddr, txaddr, rxaddr;
5873	struct uio	auio;
5874	struct iovec_32	aiov;
5875	int nofreeupl = flags & UPL_NOCOMMIT;
5876	size_t nmwsize, biosize, iosize, pgsize, txsize, rxsize, xsize, remsize;
5877	struct nfsreq *req[MAXPAGINGREQS];
5878	int nextsend, nextwait, wverfset, commit, restart = 0;
5879	uint64_t wverf, wverf2;
5880
5881	FSDBG(323, f_offset, size, pl, pl_offset);
5882
5883	if (pl == (upl_t)NULL)
5884		panic("nfs_pageout: no upl");
5885
5886	if (size <= 0) {
5887		printf("nfs_pageout: invalid size %ld", size);
5888		if (!nofreeupl)
5889			ubc_upl_abort(pl, 0);
5890		return (EINVAL);
5891	}
5892
5893	if (!nmp) {
5894		if (!nofreeupl)
5895			ubc_upl_abort(pl, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
5896		return (ENXIO);
5897	}
5898	biosize = nmp->nm_biosize;
5899	nmwsize = nmp->nm_wsize;
5900
5901	nfs_data_lock2(np, NFS_NODE_LOCK_SHARED, 0);
5902
5903	/*
5904	 * Check to see whether the buffer is incore.
5905	 * If incore and not busy, invalidate it from the cache.
5906	 */
5907	for (iosize = 0; iosize < size; iosize += xsize) {
5908		off = f_offset + iosize;
5909		/* need make sure we do things on block boundaries */
5910		xsize = biosize - (off % biosize);
5911		if (off + xsize > f_offset + size)
5912			xsize = f_offset + size - off;
5913		lbn = (daddr64_t)(off / biosize);
5914		lck_mtx_lock(nfs_buf_mutex);
5915		if ((bp = nfs_buf_incore(np, lbn))) {
5916			FSDBG(323, off, bp, bp->nb_lflags, bp->nb_flags);
5917			if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) {
5918				lck_mtx_unlock(nfs_buf_mutex);
5919				nfs_data_unlock2(np, 0);
5920				/* no panic. just tell vm we are busy */
5921				if (!nofreeupl)
5922					ubc_upl_abort(pl, 0);
5923				return (EBUSY);
5924			}
5925			if (bp->nb_dirtyend > 0) {
5926				/*
5927				 * if there's a dirty range in the buffer, check
5928				 * to see if it extends beyond the pageout region
5929				 *
5930				 * if the dirty region lies completely within the
5931				 * pageout region, we just invalidate the buffer
5932				 * because it's all being written out now anyway.
5933				 *
5934				 * if any of the dirty region lies outside the
5935				 * pageout region, we'll try to clip the dirty
5936				 * region to eliminate the portion that's being
5937				 * paged out.  If that's not possible, because
5938				 * the dirty region extends before and after the
5939				 * pageout region, then we'll just return EBUSY.
5940				 */
5941				off_t boff, start, end;
5942				boff = NBOFF(bp);
5943				start = off;
5944				end = off + xsize;
5945				/* clip end to EOF */
5946				if (end > (off_t)np->n_size)
5947					end = np->n_size;
5948				start -= boff;
5949				end -= boff;
5950				if ((bp->nb_dirtyoff < start) &&
5951				    (bp->nb_dirtyend > end)) {
5952				    /* not gonna be able to clip the dirty region */
5953				    FSDBG(323, np, bp, 0xd00deebc, EBUSY);
5954				    nfs_buf_drop(bp);
5955				    lck_mtx_unlock(nfs_buf_mutex);
5956				    nfs_data_unlock2(np, 0);
5957				    if (!nofreeupl)
5958					ubc_upl_abort(pl, 0);
5959				    return (EBUSY);
5960				}
5961				if ((bp->nb_dirtyoff < start) ||
5962				    (bp->nb_dirtyend > end)) {
5963				    /* clip dirty region, if necessary */
5964				    if (bp->nb_dirtyoff < start)
5965					bp->nb_dirtyend = min(bp->nb_dirtyend, start);
5966				    if (bp->nb_dirtyend > end)
5967					bp->nb_dirtyoff = max(bp->nb_dirtyoff, end);
5968				    FSDBG(323, bp, bp->nb_dirtyoff, bp->nb_dirtyend, 0xd00dee00);
5969				    /* we're leaving this block dirty */
5970				    nfs_buf_drop(bp);
5971				    lck_mtx_unlock(nfs_buf_mutex);
5972				    continue;
5973				}
5974			}
5975			nfs_buf_remfree(bp);
5976			lck_mtx_unlock(nfs_buf_mutex);
5977			SET(bp->nb_flags, NB_INVAL);
5978			nfs_lock(np, NFS_NODE_LOCK_FORCE);
5979			if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
5980				CLR(bp->nb_flags, NB_NEEDCOMMIT);
5981				np->n_needcommitcnt--;
5982				CHECK_NEEDCOMMITCNT(np);
5983			}
5984			nfs_unlock(np);
5985			nfs_buf_release(bp, 1);
5986		} else {
5987			lck_mtx_unlock(nfs_buf_mutex);
5988		}
5989	}
5990
5991	thd = vfs_context_thread(ap->a_context);
5992	cred = ubc_getcred(vp);
5993	if (!IS_VALID_CRED(cred))
5994		cred = vfs_context_ucred(ap->a_context);
5995
5996	nfs_lock(np, NFS_NODE_LOCK_FORCE);
5997	if (np->n_flag & NWRITEERR) {
5998		error = np->n_error;
5999		nfs_unlock(np);
6000		nfs_data_unlock2(np, 0);
6001		if (!nofreeupl)
6002			ubc_upl_abort_range(pl, pl_offset, size,
6003					    UPL_ABORT_FREE_ON_EMPTY);
6004		return (error);
6005	}
6006	nfs_unlock(np);
6007
6008	if (f_offset < 0 || f_offset >= (off_t)np->n_size ||
6009	    f_offset & PAGE_MASK_64 || size & PAGE_MASK_64) {
6010		nfs_data_unlock2(np, 0);
6011		if (!nofreeupl)
6012			ubc_upl_abort_range(pl, pl_offset, size,
6013					    UPL_ABORT_FREE_ON_EMPTY);
6014		return (EINVAL);
6015	}
6016
6017	ubc_upl_map(pl, &ioaddr);
6018	ioaddr += pl_offset;
6019
6020	if ((u_quad_t)f_offset + size > np->n_size)
6021		xsize = np->n_size - f_offset;
6022	else
6023		xsize = size;
6024
6025	pgsize = round_page_64(xsize);
6026	if ((size > pgsize) && !nofreeupl)
6027		ubc_upl_abort_range(pl, pl_offset + pgsize, size - pgsize,
6028				    UPL_ABORT_FREE_ON_EMPTY);
6029
6030	/*
6031	 * check for partial page and clear the
6032	 * contents past end of the file before
6033	 * releasing it in the VM page cache
6034	 */
6035	if ((u_quad_t)f_offset < np->n_size && (u_quad_t)f_offset + size > np->n_size) {
6036		size_t io = np->n_size - f_offset;
6037		bzero((caddr_t)(ioaddr + io), size - io);
6038		FSDBG(321, np->n_size, f_offset, f_offset + io, size - io);
6039	}
6040	nfs_data_unlock2(np, 0);
6041
6042#if 1   /* LP64todo - can't use new segment flags until the drivers are ready */
6043	auio.uio_segflg = UIO_SYSSPACE;
6044#else
6045	auio.uio_segflg = UIO_SYSSPACE32;
6046#endif
6047	auio.uio_rw = UIO_WRITE;
6048	auio.uio_procp = vfs_context_proc(ap->a_context);
6049
6050tryagain:
6051	wverf = wverf2 = wverfset = 0;
6052	txsize = rxsize = xsize;
6053	txoffset = rxoffset = f_offset;
6054	txaddr = rxaddr = ioaddr;
6055	commit = NFS_WRITE_FILESYNC;
6056
6057	bzero(req, sizeof(req));
6058	nextsend = nextwait = 0;
6059	do {
6060		/* send requests while we need to and have available slots */
6061		while ((txsize > 0) && (req[nextsend] == NULL)) {
6062			iosize = MIN(nmwsize, txsize);
6063			aiov.iov_len = iosize;
6064			aiov.iov_base = (uintptr_t)txaddr;
6065			auio.uio_iovs.iov32p = &aiov;
6066			auio.uio_iovcnt = 1;
6067			auio.uio_offset = txoffset;
6068			uio_uio_resid_set(&auio, iosize);
6069			FSDBG(323, auio.uio_offset, iosize, txaddr, txsize);
6070			OSAddAtomic(1, (SInt32*)&nfsstats.pageouts);
6071			vnode_startwrite(vp);
6072			iomode = NFS_WRITE_UNSTABLE;
6073			if ((error = nmp->nm_funcs->nf_write_rpc_async(np, &auio, iosize, thd, cred, iomode, NULL, &req[nextsend]))) {
6074				req[nextsend] = NULL;
6075				vnode_writedone(vp);
6076				break;
6077			}
6078			txaddr += iosize;
6079			txoffset += iosize;
6080			txsize -= iosize;
6081			nextsend = (nextsend + 1) % MAXPAGINGREQS;
6082		}
6083		/* wait while we need to and break out if more requests to send */
6084		while ((rxsize > 0) && req[nextwait]) {
6085			iosize = remsize = MIN(nmwsize, rxsize);
6086			error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req[nextwait], &iomode, &iosize, &wverf2);
6087			req[nextwait] = NULL;
6088			nextwait = (nextwait + 1) % MAXPAGINGREQS;
6089			vnode_writedone(vp);
6090			if (error) {
6091				FSDBG(323, rxoffset, rxsize, error, -1);
6092				break;
6093			}
6094			if (!wverfset) {
6095				wverf = wverf2;
6096				wverfset = 1;
6097			} else if (wverf != wverf2) {
6098				/* verifier changed, so we need to restart all the writes */
6099				restart++;
6100				goto cancel;
6101			}
6102			/* Retain the lowest commitment level returned. */
6103			if (iomode < commit)
6104				commit = iomode;
6105			rxaddr += iosize;
6106			rxoffset += iosize;
6107			rxsize -= iosize;
6108			remsize -= iosize;
6109			if (remsize > 0) {
6110				/* need to try sending the remainder */
6111				iosize = remsize;
6112				aiov.iov_len = remsize;
6113				aiov.iov_base = (uintptr_t)rxaddr;
6114				auio.uio_iovs.iov32p = &aiov;
6115				auio.uio_iovcnt = 1;
6116				auio.uio_offset = rxoffset;
6117				uio_uio_resid_set(&auio, remsize);
6118				iomode = NFS_WRITE_UNSTABLE;
6119				error = nfs_write_rpc2(np, &auio, thd, cred, &iomode, &wverf2);
6120				if (error) {
6121					FSDBG(323, rxoffset, rxsize, error, -1);
6122					break;
6123				}
6124				if (wverf != wverf2) {
6125					/* verifier changed, so we need to restart all the writes */
6126					restart++;
6127					goto cancel;
6128				}
6129				if (iomode < commit)
6130					commit = iomode;
6131				rxaddr += iosize;
6132				rxoffset += iosize;
6133				rxsize -= iosize;
6134			}
6135			if (txsize)
6136				break;
6137		}
6138	} while (!error && (txsize || rxsize));
6139
6140	restart = 0;
6141
6142	if (!error && (commit != NFS_WRITE_FILESYNC)) {
6143		error = nmp->nm_funcs->nf_commit_rpc(np, f_offset, xsize, cred);
6144		if (error == NFSERR_STALEWRITEVERF) {
6145			restart++;
6146			error = EIO;
6147		}
6148	}
6149
6150	if (error) {
6151cancel:
6152		/* cancel any outstanding requests */
6153		while (req[nextwait]) {
6154			nfs_request_async_cancel(req[nextwait]);
6155			req[nextwait] = NULL;
6156			nextwait = (nextwait + 1) % MAXPAGINGREQS;
6157			vnode_writedone(vp);
6158		}
6159		if (restart) {
6160			if (restart <= 10)
6161				goto tryagain;
6162			printf("nfs_pageout: too many restarts, aborting.\n");
6163			FSDBG(323, f_offset, xsize, ERESTART, -1);
6164		}
6165	}
6166
6167	ubc_upl_unmap(pl);
6168
6169	/*
6170	 * We've had several different solutions on what to do when the pageout
6171	 * gets an error. If we don't handle it, and return an error to the
6172	 * caller, vm, it will retry . This can end in endless looping
6173	 * between vm and here doing retries of the same page. Doing a dump
6174	 * back to vm, will get it out of vm's knowledge and we lose whatever
6175	 * data existed. This is risky, but in some cases necessary. For
6176	 * example, the initial fix here was to do that for ESTALE. In that case
6177	 * the server is telling us that the file is no longer the same. We
6178	 * would not want to keep paging out to that. We also saw some 151
6179	 * errors from Auspex server and NFSv3 can return errors higher than
6180	 * ELAST. Those along with NFS known server errors we will "dump" from
6181	 * vm.  Errors we don't expect to occur, we dump and log for further
6182	 * analysis. Errors that could be transient, networking ones,
6183	 * we let vm "retry". Lastly, errors that we retry, but may have potential
6184	 * to storm the network, we "retrywithsleep". "sever" will be used in
6185	 * in the future to dump all pages of object for cases like ESTALE.
6186	 * All this is the basis for the states returned and first guesses on
6187	 * error handling. Tweaking expected as more statistics are gathered.
6188	 * Note, in the long run we may need another more robust solution to
6189	 * have some kind of persistant store when the vm cannot dump nor keep
6190	 * retrying as a solution, but this would be a file architectural change
6191	 */
6192	if (!nofreeupl) { /* otherwise stacked file system has to handle this */
6193		if (error) {
6194			int abortflags = 0;
6195			char action = nfs_pageouterrorhandler(error);
6196
6197			switch (action) {
6198				case DUMP:
6199					abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
6200					break;
6201				case DUMPANDLOG:
6202					abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY;
6203					if (error <= NFS_ELAST) {
6204						if ((errorcount[error] % 100) == 0)
6205							printf("nfs_pageout: unexpected error %d. dumping vm page\n", error);
6206						errorcount[error]++;
6207					}
6208					break;
6209				case RETRY:
6210					abortflags = UPL_ABORT_FREE_ON_EMPTY;
6211					break;
6212				case RETRYWITHSLEEP:
6213					abortflags = UPL_ABORT_FREE_ON_EMPTY;
6214					/* pri unused. PSOCK for placeholder. */
6215					tsleep(&lbolt, PSOCK, "nfspageout", 0);
6216					break;
6217				case SEVER: /* not implemented */
6218				default:
6219					printf("nfs_pageout: action %d not expected\n", action);
6220					break;
6221			}
6222
6223			ubc_upl_abort_range(pl, pl_offset, pgsize, abortflags);
6224			/* return error in all cases above */
6225
6226		} else {
6227			ubc_upl_commit_range(pl, pl_offset, pgsize,
6228					     UPL_COMMIT_CLEAR_DIRTY |
6229					     UPL_COMMIT_FREE_ON_EMPTY);
6230		}
6231	}
6232	return (error);
6233}
6234
6235/* Blktooff derives file offset given a logical block number */
6236static int
6237nfs_vnop_blktooff(
6238	struct vnop_blktooff_args /* {
6239		struct vnodeop_desc *a_desc;
6240		vnode_t a_vp;
6241		daddr64_t a_lblkno;
6242		off_t *a_offset;
6243	} */ *ap)
6244{
6245	int biosize;
6246	vnode_t vp = ap->a_vp;
6247	struct nfsmount *nmp = VTONMP(vp);
6248
6249	if (!nmp)
6250		return (ENXIO);
6251	biosize = nmp->nm_biosize;
6252
6253	*ap->a_offset = (off_t)(ap->a_lblkno * biosize);
6254
6255	return (0);
6256}
6257
6258static int
6259nfs_vnop_offtoblk(
6260	struct vnop_offtoblk_args /* {
6261		struct vnodeop_desc *a_desc;
6262		vnode_t a_vp;
6263		off_t a_offset;
6264		daddr64_t *a_lblkno;
6265	} */ *ap)
6266{
6267	int biosize;
6268	vnode_t vp = ap->a_vp;
6269	struct nfsmount *nmp = VTONMP(vp);
6270
6271	if (!nmp)
6272		return (ENXIO);
6273	biosize = nmp->nm_biosize;
6274
6275	*ap->a_lblkno = (daddr64_t)(ap->a_offset / biosize);
6276
6277	return (0);
6278}
6279
6280