Deleted Added
full compact
ffs_vfsops.c (212788) ffs_vfsops.c (213664)
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1989, 1991, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 212788 2010-09-17 09:14:40Z obrien $");
33__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vfsops.c 213664 2010-10-10 07:05:47Z kib $");
34
35#include "opt_quota.h"
36#include "opt_ufs.h"
37#include "opt_ffs.h"
38#include "opt_ddb.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/kernel.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <security/mac/mac_framework.h>
56
57#include <ufs/ufs/extattr.h>
58#include <ufs/ufs/gjournal.h>
59#include <ufs/ufs/quota.h>
60#include <ufs/ufs/ufsmount.h>
61#include <ufs/ufs/inode.h>
62#include <ufs/ufs/ufs_extern.h>
63
64#include <ufs/ffs/fs.h>
65#include <ufs/ffs/ffs_extern.h>
66
67#include <vm/vm.h>
68#include <vm/uma.h>
69#include <vm/vm_page.h>
70
71#include <geom/geom.h>
72#include <geom/geom_vfs.h>
73
74#include <ddb/ddb.h>
75
76static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
77
78static int ffs_reload(struct mount *, struct thread *);
79static int ffs_mountfs(struct vnode *, struct mount *, struct thread *);
80static void ffs_oldfscompat_read(struct fs *, struct ufsmount *,
81 ufs2_daddr_t);
82static void ffs_ifree(struct ufsmount *ump, struct inode *ip);
83static vfs_init_t ffs_init;
84static vfs_uninit_t ffs_uninit;
85static vfs_extattrctl_t ffs_extattrctl;
86static vfs_cmount_t ffs_cmount;
87static vfs_unmount_t ffs_unmount;
88static vfs_mount_t ffs_mount;
89static vfs_statfs_t ffs_statfs;
90static vfs_fhtovp_t ffs_fhtovp;
91static vfs_sync_t ffs_sync;
92
93static struct vfsops ufs_vfsops = {
94 .vfs_extattrctl = ffs_extattrctl,
95 .vfs_fhtovp = ffs_fhtovp,
96 .vfs_init = ffs_init,
97 .vfs_mount = ffs_mount,
98 .vfs_cmount = ffs_cmount,
99 .vfs_quotactl = ufs_quotactl,
100 .vfs_root = ufs_root,
101 .vfs_statfs = ffs_statfs,
102 .vfs_sync = ffs_sync,
103 .vfs_uninit = ffs_uninit,
104 .vfs_unmount = ffs_unmount,
105 .vfs_vget = ffs_vget,
106 .vfs_susp_clean = process_deferred_inactive,
107};
108
109VFS_SET(ufs_vfsops, ufs, 0);
110MODULE_VERSION(ufs, 1);
111
112static b_strategy_t ffs_geom_strategy;
113static b_write_t ffs_bufwrite;
114
115static struct buf_ops ffs_ops = {
116 .bop_name = "FFS",
117 .bop_write = ffs_bufwrite,
118 .bop_strategy = ffs_geom_strategy,
119 .bop_sync = bufsync,
120#ifdef NO_FFS_SNAPSHOT
121 .bop_bdflush = bufbdflush,
122#else
123 .bop_bdflush = ffs_bdflush,
124#endif
125};
126
127/*
128 * Note that userquota and groupquota options are not currently used
129 * by UFS/FFS code and generally mount(8) does not pass those options
130 * from userland, but they can be passed by loader(8) via
131 * vfs.root.mountfrom.options.
132 */
133static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
134 "noclusterw", "noexec", "export", "force", "from", "groupquota",
135 "multilabel", "nfsv4acls", "snapshot", "nosuid", "suiddir", "nosymfollow",
136 "sync", "union", "userquota", NULL };
137
138static int
139ffs_mount(struct mount *mp)
140{
141 struct vnode *devvp;
142 struct thread *td;
143 struct ufsmount *ump = 0;
144 struct fs *fs;
145 int error, flags;
146 u_int mntorflags;
147 accmode_t accmode;
148 struct nameidata ndp;
149 char *fspec;
150
151 td = curthread;
152 if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
153 return (EINVAL);
154 if (uma_inode == NULL) {
155 uma_inode = uma_zcreate("FFS inode",
156 sizeof(struct inode), NULL, NULL, NULL, NULL,
157 UMA_ALIGN_PTR, 0);
158 uma_ufs1 = uma_zcreate("FFS1 dinode",
159 sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
160 UMA_ALIGN_PTR, 0);
161 uma_ufs2 = uma_zcreate("FFS2 dinode",
162 sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
163 UMA_ALIGN_PTR, 0);
164 }
165
166 vfs_deleteopt(mp->mnt_optnew, "groupquota");
167 vfs_deleteopt(mp->mnt_optnew, "userquota");
168
169 fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
170 if (error)
171 return (error);
172
173 mntorflags = 0;
174 if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
175 mntorflags |= MNT_ACLS;
176
177 if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
178 mntorflags |= MNT_SNAPSHOT;
179 /*
180 * Once we have set the MNT_SNAPSHOT flag, do not
181 * persist "snapshot" in the options list.
182 */
183 vfs_deleteopt(mp->mnt_optnew, "snapshot");
184 vfs_deleteopt(mp->mnt_opt, "snapshot");
185 }
186
187 if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
188 if (mntorflags & MNT_ACLS) {
189 printf("WARNING: \"acls\" and \"nfsv4acls\" "
190 "options are mutually exclusive\n");
191 return (EINVAL);
192 }
193 mntorflags |= MNT_NFS4ACLS;
194 }
195
196 MNT_ILOCK(mp);
197 mp->mnt_flag |= mntorflags;
198 MNT_IUNLOCK(mp);
199 /*
200 * If updating, check whether changing from read-only to
201 * read/write; if there is no device name, that's all we do.
202 */
203 if (mp->mnt_flag & MNT_UPDATE) {
204 ump = VFSTOUFS(mp);
205 fs = ump->um_fs;
206 devvp = ump->um_devvp;
207 if (fs->fs_ronly == 0 &&
208 vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
209 /*
210 * Flush any dirty data and suspend filesystem.
211 */
212 if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
213 return (error);
214 for (;;) {
215 vn_finished_write(mp);
216 if ((error = vfs_write_suspend(mp)) != 0)
217 return (error);
218 MNT_ILOCK(mp);
219 if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
220 /*
221 * Allow the secondary writes
222 * to proceed.
223 */
224 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
225 MNTK_SUSPEND2);
226 wakeup(&mp->mnt_flag);
227 MNT_IUNLOCK(mp);
228 /*
229 * Allow the curthread to
230 * ignore the suspension to
231 * synchronize on-disk state.
232 */
233 td->td_pflags |= TDP_IGNSUSP;
234 break;
235 }
236 MNT_IUNLOCK(mp);
237 vn_start_write(NULL, &mp, V_WAIT);
238 }
239 /*
240 * Check for and optionally get rid of files open
241 * for writing.
242 */
243 flags = WRITECLOSE;
244 if (mp->mnt_flag & MNT_FORCE)
245 flags |= FORCECLOSE;
246 if (mp->mnt_flag & MNT_SOFTDEP) {
247 error = softdep_flushfiles(mp, flags, td);
248 } else {
249 error = ffs_flushfiles(mp, flags, td);
250 }
251 if (error) {
252 vfs_write_resume(mp);
253 return (error);
254 }
255 if (fs->fs_pendingblocks != 0 ||
256 fs->fs_pendinginodes != 0) {
257 printf("%s: %s: blocks %jd files %d\n",
258 fs->fs_fsmnt, "update error",
259 (intmax_t)fs->fs_pendingblocks,
260 fs->fs_pendinginodes);
261 fs->fs_pendingblocks = 0;
262 fs->fs_pendinginodes = 0;
263 }
264 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
265 fs->fs_clean = 1;
266 if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
267 fs->fs_ronly = 0;
268 fs->fs_clean = 0;
269 vfs_write_resume(mp);
270 return (error);
271 }
272 DROP_GIANT();
273 g_topology_lock();
274 g_access(ump->um_cp, 0, -1, 0);
275 g_topology_unlock();
276 PICKUP_GIANT();
277 fs->fs_ronly = 1;
278 MNT_ILOCK(mp);
279 mp->mnt_flag |= MNT_RDONLY;
280 MNT_IUNLOCK(mp);
281 /*
282 * Allow the writers to note that filesystem
283 * is ro now.
284 */
285 vfs_write_resume(mp);
286 }
287 if ((mp->mnt_flag & MNT_RELOAD) &&
288 (error = ffs_reload(mp, td)) != 0)
289 return (error);
290 if (fs->fs_ronly &&
291 !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
292 /*
293 * If upgrade to read-write by non-root, then verify
294 * that user has necessary permissions on the device.
295 */
296 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
297 error = VOP_ACCESS(devvp, VREAD | VWRITE,
298 td->td_ucred, td);
299 if (error)
300 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
301 if (error) {
302 VOP_UNLOCK(devvp, 0);
303 return (error);
304 }
305 VOP_UNLOCK(devvp, 0);
306 fs->fs_flags &= ~FS_UNCLEAN;
307 if (fs->fs_clean == 0) {
308 fs->fs_flags |= FS_UNCLEAN;
309 if ((mp->mnt_flag & MNT_FORCE) ||
310 ((fs->fs_flags &
311 (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
312 (fs->fs_flags & FS_DOSOFTDEP))) {
313 printf("WARNING: %s was not %s\n",
314 fs->fs_fsmnt, "properly dismounted");
315 } else {
316 printf(
317"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
318 fs->fs_fsmnt);
319 if (fs->fs_flags & FS_SUJ)
320 printf(
321"WARNING: Forced mount will invalidate journal contents\n");
322 return (EPERM);
323 }
324 }
325 DROP_GIANT();
326 g_topology_lock();
327 /*
328 * If we're the root device, we may not have an E count
329 * yet, get it now.
330 */
331 if (ump->um_cp->ace == 0)
332 error = g_access(ump->um_cp, 0, 1, 1);
333 else
334 error = g_access(ump->um_cp, 0, 1, 0);
335 g_topology_unlock();
336 PICKUP_GIANT();
337 if (error)
338 return (error);
339 if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
340 return (error);
341 fs->fs_ronly = 0;
342 MNT_ILOCK(mp);
343 mp->mnt_flag &= ~MNT_RDONLY;
344 MNT_IUNLOCK(mp);
345 fs->fs_mtime = time_second;
346 /* check to see if we need to start softdep */
347 if ((fs->fs_flags & FS_DOSOFTDEP) &&
348 (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
349 vn_finished_write(mp);
350 return (error);
351 }
352 fs->fs_clean = 0;
353 if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
354 vn_finished_write(mp);
355 return (error);
356 }
357 if (fs->fs_snapinum[0] != 0)
358 ffs_snapshot_mount(mp);
359 vn_finished_write(mp);
360 }
361 /*
362 * Soft updates is incompatible with "async",
363 * so if we are doing softupdates stop the user
364 * from setting the async flag in an update.
365 * Softdep_mount() clears it in an initial mount
366 * or ro->rw remount.
367 */
368 if (mp->mnt_flag & MNT_SOFTDEP) {
369 /* XXX: Reset too late ? */
370 MNT_ILOCK(mp);
371 mp->mnt_flag &= ~MNT_ASYNC;
372 MNT_IUNLOCK(mp);
373 }
374 /*
375 * Keep MNT_ACLS flag if it is stored in superblock.
376 */
377 if ((fs->fs_flags & FS_ACLS) != 0) {
378 /* XXX: Set too late ? */
379 MNT_ILOCK(mp);
380 mp->mnt_flag |= MNT_ACLS;
381 MNT_IUNLOCK(mp);
382 }
383
384 if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
385 /* XXX: Set too late ? */
386 MNT_ILOCK(mp);
387 mp->mnt_flag |= MNT_NFS4ACLS;
388 MNT_IUNLOCK(mp);
389 }
390
391 /*
392 * If this is a snapshot request, take the snapshot.
393 */
394 if (mp->mnt_flag & MNT_SNAPSHOT)
395 return (ffs_snapshot(mp, fspec));
396 }
397
398 /*
399 * Not an update, or updating the name: look up the name
400 * and verify that it refers to a sensible disk device.
401 */
402 NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
403 if ((error = namei(&ndp)) != 0)
404 return (error);
405 NDFREE(&ndp, NDF_ONLY_PNBUF);
406 devvp = ndp.ni_vp;
407 if (!vn_isdisk(devvp, &error)) {
408 vput(devvp);
409 return (error);
410 }
411
412 /*
413 * If mount by non-root, then verify that user has necessary
414 * permissions on the device.
415 */
416 accmode = VREAD;
417 if ((mp->mnt_flag & MNT_RDONLY) == 0)
418 accmode |= VWRITE;
419 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
420 if (error)
421 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
422 if (error) {
423 vput(devvp);
424 return (error);
425 }
426
427 if (mp->mnt_flag & MNT_UPDATE) {
428 /*
429 * Update only
430 *
431 * If it's not the same vnode, or at least the same device
432 * then it's not correct.
433 */
434
435 if (devvp->v_rdev != ump->um_devvp->v_rdev)
436 error = EINVAL; /* needs translation */
437 vput(devvp);
438 if (error)
439 return (error);
440 } else {
441 /*
442 * New mount
443 *
444 * We need the name for the mount point (also used for
445 * "last mounted on") copied in. If an error occurs,
446 * the mount point is discarded by the upper level code.
447 * Note that vfs_mount() populates f_mntonname for us.
448 */
449 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
450 vrele(devvp);
451 return (error);
452 }
453 }
454 vfs_mountedfrom(mp, fspec);
455 return (0);
456}
457
458/*
459 * Compatibility with old mount system call.
460 */
461
462static int
463ffs_cmount(struct mntarg *ma, void *data, int flags)
464{
465 struct ufs_args args;
34
35#include "opt_quota.h"
36#include "opt_ufs.h"
37#include "opt_ffs.h"
38#include "opt_ddb.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/namei.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/kernel.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/conf.h>
51#include <sys/fcntl.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54
55#include <security/mac/mac_framework.h>
56
57#include <ufs/ufs/extattr.h>
58#include <ufs/ufs/gjournal.h>
59#include <ufs/ufs/quota.h>
60#include <ufs/ufs/ufsmount.h>
61#include <ufs/ufs/inode.h>
62#include <ufs/ufs/ufs_extern.h>
63
64#include <ufs/ffs/fs.h>
65#include <ufs/ffs/ffs_extern.h>
66
67#include <vm/vm.h>
68#include <vm/uma.h>
69#include <vm/vm_page.h>
70
71#include <geom/geom.h>
72#include <geom/geom_vfs.h>
73
74#include <ddb/ddb.h>
75
76static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
77
78static int ffs_reload(struct mount *, struct thread *);
79static int ffs_mountfs(struct vnode *, struct mount *, struct thread *);
80static void ffs_oldfscompat_read(struct fs *, struct ufsmount *,
81 ufs2_daddr_t);
82static void ffs_ifree(struct ufsmount *ump, struct inode *ip);
83static vfs_init_t ffs_init;
84static vfs_uninit_t ffs_uninit;
85static vfs_extattrctl_t ffs_extattrctl;
86static vfs_cmount_t ffs_cmount;
87static vfs_unmount_t ffs_unmount;
88static vfs_mount_t ffs_mount;
89static vfs_statfs_t ffs_statfs;
90static vfs_fhtovp_t ffs_fhtovp;
91static vfs_sync_t ffs_sync;
92
93static struct vfsops ufs_vfsops = {
94 .vfs_extattrctl = ffs_extattrctl,
95 .vfs_fhtovp = ffs_fhtovp,
96 .vfs_init = ffs_init,
97 .vfs_mount = ffs_mount,
98 .vfs_cmount = ffs_cmount,
99 .vfs_quotactl = ufs_quotactl,
100 .vfs_root = ufs_root,
101 .vfs_statfs = ffs_statfs,
102 .vfs_sync = ffs_sync,
103 .vfs_uninit = ffs_uninit,
104 .vfs_unmount = ffs_unmount,
105 .vfs_vget = ffs_vget,
106 .vfs_susp_clean = process_deferred_inactive,
107};
108
109VFS_SET(ufs_vfsops, ufs, 0);
110MODULE_VERSION(ufs, 1);
111
112static b_strategy_t ffs_geom_strategy;
113static b_write_t ffs_bufwrite;
114
115static struct buf_ops ffs_ops = {
116 .bop_name = "FFS",
117 .bop_write = ffs_bufwrite,
118 .bop_strategy = ffs_geom_strategy,
119 .bop_sync = bufsync,
120#ifdef NO_FFS_SNAPSHOT
121 .bop_bdflush = bufbdflush,
122#else
123 .bop_bdflush = ffs_bdflush,
124#endif
125};
126
127/*
128 * Note that userquota and groupquota options are not currently used
129 * by UFS/FFS code and generally mount(8) does not pass those options
130 * from userland, but they can be passed by loader(8) via
131 * vfs.root.mountfrom.options.
132 */
133static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
134 "noclusterw", "noexec", "export", "force", "from", "groupquota",
135 "multilabel", "nfsv4acls", "snapshot", "nosuid", "suiddir", "nosymfollow",
136 "sync", "union", "userquota", NULL };
137
138static int
139ffs_mount(struct mount *mp)
140{
141 struct vnode *devvp;
142 struct thread *td;
143 struct ufsmount *ump = 0;
144 struct fs *fs;
145 int error, flags;
146 u_int mntorflags;
147 accmode_t accmode;
148 struct nameidata ndp;
149 char *fspec;
150
151 td = curthread;
152 if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
153 return (EINVAL);
154 if (uma_inode == NULL) {
155 uma_inode = uma_zcreate("FFS inode",
156 sizeof(struct inode), NULL, NULL, NULL, NULL,
157 UMA_ALIGN_PTR, 0);
158 uma_ufs1 = uma_zcreate("FFS1 dinode",
159 sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
160 UMA_ALIGN_PTR, 0);
161 uma_ufs2 = uma_zcreate("FFS2 dinode",
162 sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
163 UMA_ALIGN_PTR, 0);
164 }
165
166 vfs_deleteopt(mp->mnt_optnew, "groupquota");
167 vfs_deleteopt(mp->mnt_optnew, "userquota");
168
169 fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
170 if (error)
171 return (error);
172
173 mntorflags = 0;
174 if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
175 mntorflags |= MNT_ACLS;
176
177 if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
178 mntorflags |= MNT_SNAPSHOT;
179 /*
180 * Once we have set the MNT_SNAPSHOT flag, do not
181 * persist "snapshot" in the options list.
182 */
183 vfs_deleteopt(mp->mnt_optnew, "snapshot");
184 vfs_deleteopt(mp->mnt_opt, "snapshot");
185 }
186
187 if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
188 if (mntorflags & MNT_ACLS) {
189 printf("WARNING: \"acls\" and \"nfsv4acls\" "
190 "options are mutually exclusive\n");
191 return (EINVAL);
192 }
193 mntorflags |= MNT_NFS4ACLS;
194 }
195
196 MNT_ILOCK(mp);
197 mp->mnt_flag |= mntorflags;
198 MNT_IUNLOCK(mp);
199 /*
200 * If updating, check whether changing from read-only to
201 * read/write; if there is no device name, that's all we do.
202 */
203 if (mp->mnt_flag & MNT_UPDATE) {
204 ump = VFSTOUFS(mp);
205 fs = ump->um_fs;
206 devvp = ump->um_devvp;
207 if (fs->fs_ronly == 0 &&
208 vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
209 /*
210 * Flush any dirty data and suspend filesystem.
211 */
212 if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
213 return (error);
214 for (;;) {
215 vn_finished_write(mp);
216 if ((error = vfs_write_suspend(mp)) != 0)
217 return (error);
218 MNT_ILOCK(mp);
219 if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
220 /*
221 * Allow the secondary writes
222 * to proceed.
223 */
224 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
225 MNTK_SUSPEND2);
226 wakeup(&mp->mnt_flag);
227 MNT_IUNLOCK(mp);
228 /*
229 * Allow the curthread to
230 * ignore the suspension to
231 * synchronize on-disk state.
232 */
233 td->td_pflags |= TDP_IGNSUSP;
234 break;
235 }
236 MNT_IUNLOCK(mp);
237 vn_start_write(NULL, &mp, V_WAIT);
238 }
239 /*
240 * Check for and optionally get rid of files open
241 * for writing.
242 */
243 flags = WRITECLOSE;
244 if (mp->mnt_flag & MNT_FORCE)
245 flags |= FORCECLOSE;
246 if (mp->mnt_flag & MNT_SOFTDEP) {
247 error = softdep_flushfiles(mp, flags, td);
248 } else {
249 error = ffs_flushfiles(mp, flags, td);
250 }
251 if (error) {
252 vfs_write_resume(mp);
253 return (error);
254 }
255 if (fs->fs_pendingblocks != 0 ||
256 fs->fs_pendinginodes != 0) {
257 printf("%s: %s: blocks %jd files %d\n",
258 fs->fs_fsmnt, "update error",
259 (intmax_t)fs->fs_pendingblocks,
260 fs->fs_pendinginodes);
261 fs->fs_pendingblocks = 0;
262 fs->fs_pendinginodes = 0;
263 }
264 if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
265 fs->fs_clean = 1;
266 if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
267 fs->fs_ronly = 0;
268 fs->fs_clean = 0;
269 vfs_write_resume(mp);
270 return (error);
271 }
272 DROP_GIANT();
273 g_topology_lock();
274 g_access(ump->um_cp, 0, -1, 0);
275 g_topology_unlock();
276 PICKUP_GIANT();
277 fs->fs_ronly = 1;
278 MNT_ILOCK(mp);
279 mp->mnt_flag |= MNT_RDONLY;
280 MNT_IUNLOCK(mp);
281 /*
282 * Allow the writers to note that filesystem
283 * is ro now.
284 */
285 vfs_write_resume(mp);
286 }
287 if ((mp->mnt_flag & MNT_RELOAD) &&
288 (error = ffs_reload(mp, td)) != 0)
289 return (error);
290 if (fs->fs_ronly &&
291 !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
292 /*
293 * If upgrade to read-write by non-root, then verify
294 * that user has necessary permissions on the device.
295 */
296 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
297 error = VOP_ACCESS(devvp, VREAD | VWRITE,
298 td->td_ucred, td);
299 if (error)
300 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
301 if (error) {
302 VOP_UNLOCK(devvp, 0);
303 return (error);
304 }
305 VOP_UNLOCK(devvp, 0);
306 fs->fs_flags &= ~FS_UNCLEAN;
307 if (fs->fs_clean == 0) {
308 fs->fs_flags |= FS_UNCLEAN;
309 if ((mp->mnt_flag & MNT_FORCE) ||
310 ((fs->fs_flags &
311 (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
312 (fs->fs_flags & FS_DOSOFTDEP))) {
313 printf("WARNING: %s was not %s\n",
314 fs->fs_fsmnt, "properly dismounted");
315 } else {
316 printf(
317"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
318 fs->fs_fsmnt);
319 if (fs->fs_flags & FS_SUJ)
320 printf(
321"WARNING: Forced mount will invalidate journal contents\n");
322 return (EPERM);
323 }
324 }
325 DROP_GIANT();
326 g_topology_lock();
327 /*
328 * If we're the root device, we may not have an E count
329 * yet, get it now.
330 */
331 if (ump->um_cp->ace == 0)
332 error = g_access(ump->um_cp, 0, 1, 1);
333 else
334 error = g_access(ump->um_cp, 0, 1, 0);
335 g_topology_unlock();
336 PICKUP_GIANT();
337 if (error)
338 return (error);
339 if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
340 return (error);
341 fs->fs_ronly = 0;
342 MNT_ILOCK(mp);
343 mp->mnt_flag &= ~MNT_RDONLY;
344 MNT_IUNLOCK(mp);
345 fs->fs_mtime = time_second;
346 /* check to see if we need to start softdep */
347 if ((fs->fs_flags & FS_DOSOFTDEP) &&
348 (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
349 vn_finished_write(mp);
350 return (error);
351 }
352 fs->fs_clean = 0;
353 if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
354 vn_finished_write(mp);
355 return (error);
356 }
357 if (fs->fs_snapinum[0] != 0)
358 ffs_snapshot_mount(mp);
359 vn_finished_write(mp);
360 }
361 /*
362 * Soft updates is incompatible with "async",
363 * so if we are doing softupdates stop the user
364 * from setting the async flag in an update.
365 * Softdep_mount() clears it in an initial mount
366 * or ro->rw remount.
367 */
368 if (mp->mnt_flag & MNT_SOFTDEP) {
369 /* XXX: Reset too late ? */
370 MNT_ILOCK(mp);
371 mp->mnt_flag &= ~MNT_ASYNC;
372 MNT_IUNLOCK(mp);
373 }
374 /*
375 * Keep MNT_ACLS flag if it is stored in superblock.
376 */
377 if ((fs->fs_flags & FS_ACLS) != 0) {
378 /* XXX: Set too late ? */
379 MNT_ILOCK(mp);
380 mp->mnt_flag |= MNT_ACLS;
381 MNT_IUNLOCK(mp);
382 }
383
384 if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
385 /* XXX: Set too late ? */
386 MNT_ILOCK(mp);
387 mp->mnt_flag |= MNT_NFS4ACLS;
388 MNT_IUNLOCK(mp);
389 }
390
391 /*
392 * If this is a snapshot request, take the snapshot.
393 */
394 if (mp->mnt_flag & MNT_SNAPSHOT)
395 return (ffs_snapshot(mp, fspec));
396 }
397
398 /*
399 * Not an update, or updating the name: look up the name
400 * and verify that it refers to a sensible disk device.
401 */
402 NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
403 if ((error = namei(&ndp)) != 0)
404 return (error);
405 NDFREE(&ndp, NDF_ONLY_PNBUF);
406 devvp = ndp.ni_vp;
407 if (!vn_isdisk(devvp, &error)) {
408 vput(devvp);
409 return (error);
410 }
411
412 /*
413 * If mount by non-root, then verify that user has necessary
414 * permissions on the device.
415 */
416 accmode = VREAD;
417 if ((mp->mnt_flag & MNT_RDONLY) == 0)
418 accmode |= VWRITE;
419 error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
420 if (error)
421 error = priv_check(td, PRIV_VFS_MOUNT_PERM);
422 if (error) {
423 vput(devvp);
424 return (error);
425 }
426
427 if (mp->mnt_flag & MNT_UPDATE) {
428 /*
429 * Update only
430 *
431 * If it's not the same vnode, or at least the same device
432 * then it's not correct.
433 */
434
435 if (devvp->v_rdev != ump->um_devvp->v_rdev)
436 error = EINVAL; /* needs translation */
437 vput(devvp);
438 if (error)
439 return (error);
440 } else {
441 /*
442 * New mount
443 *
444 * We need the name for the mount point (also used for
445 * "last mounted on") copied in. If an error occurs,
446 * the mount point is discarded by the upper level code.
447 * Note that vfs_mount() populates f_mntonname for us.
448 */
449 if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
450 vrele(devvp);
451 return (error);
452 }
453 }
454 vfs_mountedfrom(mp, fspec);
455 return (0);
456}
457
458/*
459 * Compatibility with old mount system call.
460 */
461
462static int
463ffs_cmount(struct mntarg *ma, void *data, int flags)
464{
465 struct ufs_args args;
466 struct export_args exp;
466 int error;
467
468 if (data == NULL)
469 return (EINVAL);
470 error = copyin(data, &args, sizeof args);
471 if (error)
472 return (error);
467 int error;
468
469 if (data == NULL)
470 return (EINVAL);
471 error = copyin(data, &args, sizeof args);
472 if (error)
473 return (error);
474 vfs_oexport_conv(&args.export, &exp);
473
474 ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
475
476 ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
475 ma = mount_arg(ma, "export", &args.export, sizeof args.export);
477 ma = mount_arg(ma, "export", &exp, sizeof(exp));
476 error = kernel_mount(ma, flags);
477
478 return (error);
479}
480
481/*
482 * Reload all incore data for a filesystem (used after running fsck on
483 * the root filesystem and finding things to fix). The filesystem must
484 * be mounted read-only.
485 *
486 * Things to do to update the mount:
487 * 1) invalidate all cached meta-data.
488 * 2) re-read superblock from disk.
489 * 3) re-read summary information from disk.
490 * 4) invalidate all inactive vnodes.
491 * 5) invalidate all cached file data.
492 * 6) re-read inode data for all active vnodes.
493 */
494static int
495ffs_reload(struct mount *mp, struct thread *td)
496{
497 struct vnode *vp, *mvp, *devvp;
498 struct inode *ip;
499 void *space;
500 struct buf *bp;
501 struct fs *fs, *newfs;
502 struct ufsmount *ump;
503 ufs2_daddr_t sblockloc;
504 int i, blks, size, error;
505 int32_t *lp;
506
507 if ((mp->mnt_flag & MNT_RDONLY) == 0)
508 return (EINVAL);
509 ump = VFSTOUFS(mp);
510 /*
511 * Step 1: invalidate all cached meta-data.
512 */
513 devvp = VFSTOUFS(mp)->um_devvp;
514 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
515 if (vinvalbuf(devvp, 0, 0, 0) != 0)
516 panic("ffs_reload: dirty1");
517 VOP_UNLOCK(devvp, 0);
518
519 /*
520 * Step 2: re-read superblock from disk.
521 */
522 fs = VFSTOUFS(mp)->um_fs;
523 if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
524 NOCRED, &bp)) != 0)
525 return (error);
526 newfs = (struct fs *)bp->b_data;
527 if ((newfs->fs_magic != FS_UFS1_MAGIC &&
528 newfs->fs_magic != FS_UFS2_MAGIC) ||
529 newfs->fs_bsize > MAXBSIZE ||
530 newfs->fs_bsize < sizeof(struct fs)) {
531 brelse(bp);
532 return (EIO); /* XXX needs translation */
533 }
534 /*
535 * Copy pointer fields back into superblock before copying in XXX
536 * new superblock. These should really be in the ufsmount. XXX
537 * Note that important parameters (eg fs_ncg) are unchanged.
538 */
539 newfs->fs_csp = fs->fs_csp;
540 newfs->fs_maxcluster = fs->fs_maxcluster;
541 newfs->fs_contigdirs = fs->fs_contigdirs;
542 newfs->fs_active = fs->fs_active;
543 /* The file system is still read-only. */
544 newfs->fs_ronly = 1;
545 sblockloc = fs->fs_sblockloc;
546 bcopy(newfs, fs, (u_int)fs->fs_sbsize);
547 brelse(bp);
548 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
549 ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
550 UFS_LOCK(ump);
551 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
552 printf("%s: reload pending error: blocks %jd files %d\n",
553 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
554 fs->fs_pendinginodes);
555 fs->fs_pendingblocks = 0;
556 fs->fs_pendinginodes = 0;
557 }
558 UFS_UNLOCK(ump);
559
560 /*
561 * Step 3: re-read summary information from disk.
562 */
563 blks = howmany(fs->fs_cssize, fs->fs_fsize);
564 space = fs->fs_csp;
565 for (i = 0; i < blks; i += fs->fs_frag) {
566 size = fs->fs_bsize;
567 if (i + fs->fs_frag > blks)
568 size = (blks - i) * fs->fs_fsize;
569 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
570 NOCRED, &bp);
571 if (error)
572 return (error);
573 bcopy(bp->b_data, space, (u_int)size);
574 space = (char *)space + size;
575 brelse(bp);
576 }
577 /*
578 * We no longer know anything about clusters per cylinder group.
579 */
580 if (fs->fs_contigsumsize > 0) {
581 lp = fs->fs_maxcluster;
582 for (i = 0; i < fs->fs_ncg; i++)
583 *lp++ = fs->fs_contigsumsize;
584 }
585
586loop:
587 MNT_ILOCK(mp);
588 MNT_VNODE_FOREACH(vp, mp, mvp) {
589 VI_LOCK(vp);
590 if (vp->v_iflag & VI_DOOMED) {
591 VI_UNLOCK(vp);
592 continue;
593 }
594 MNT_IUNLOCK(mp);
595 /*
596 * Step 4: invalidate all cached file data.
597 */
598 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
599 MNT_VNODE_FOREACH_ABORT(mp, mvp);
600 goto loop;
601 }
602 if (vinvalbuf(vp, 0, 0, 0))
603 panic("ffs_reload: dirty2");
604 /*
605 * Step 5: re-read inode data for all active vnodes.
606 */
607 ip = VTOI(vp);
608 error =
609 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
610 (int)fs->fs_bsize, NOCRED, &bp);
611 if (error) {
612 VOP_UNLOCK(vp, 0);
613 vrele(vp);
614 MNT_VNODE_FOREACH_ABORT(mp, mvp);
615 return (error);
616 }
617 ffs_load_inode(bp, ip, fs, ip->i_number);
618 ip->i_effnlink = ip->i_nlink;
619 brelse(bp);
620 VOP_UNLOCK(vp, 0);
621 vrele(vp);
622 MNT_ILOCK(mp);
623 }
624 MNT_IUNLOCK(mp);
625 return (0);
626}
627
628/*
629 * Possible superblock locations ordered from most to least likely.
630 */
631static int sblock_try[] = SBLOCKSEARCH;
632
633/*
634 * Common code for mount and mountroot
635 */
636static int
637ffs_mountfs(devvp, mp, td)
638 struct vnode *devvp;
639 struct mount *mp;
640 struct thread *td;
641{
642 struct ufsmount *ump;
643 struct buf *bp;
644 struct fs *fs;
645 struct cdev *dev;
646 void *space;
647 ufs2_daddr_t sblockloc;
648 int error, i, blks, size, ronly;
649 int32_t *lp;
650 struct ucred *cred;
651 struct g_consumer *cp;
652 struct mount *nmp;
653
654 bp = NULL;
655 ump = NULL;
656 cred = td ? td->td_ucred : NOCRED;
657 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
658
659 dev = devvp->v_rdev;
660 dev_ref(dev);
661 DROP_GIANT();
662 g_topology_lock();
663 error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
664
665 /*
666 * If we are a root mount, drop the E flag so fsck can do its magic.
667 * We will pick it up again when we remount R/W.
668 */
669 if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
670 error = g_access(cp, 0, 0, -1);
671 g_topology_unlock();
672 PICKUP_GIANT();
673 VOP_UNLOCK(devvp, 0);
674 if (error)
675 goto out;
676 if (devvp->v_rdev->si_iosize_max != 0)
677 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
678 if (mp->mnt_iosize_max > MAXPHYS)
679 mp->mnt_iosize_max = MAXPHYS;
680
681 devvp->v_bufobj.bo_ops = &ffs_ops;
682
683 fs = NULL;
684 sblockloc = 0;
685 /*
686 * Try reading the superblock in each of its possible locations.
687 */
688 for (i = 0; sblock_try[i] != -1; i++) {
689 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
690 error = EINVAL;
691 vfs_mount_error(mp,
692 "Invalid sectorsize %d for superblock size %d",
693 cp->provider->sectorsize, SBLOCKSIZE);
694 goto out;
695 }
696 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
697 cred, &bp)) != 0)
698 goto out;
699 fs = (struct fs *)bp->b_data;
700 sblockloc = sblock_try[i];
701 if ((fs->fs_magic == FS_UFS1_MAGIC ||
702 (fs->fs_magic == FS_UFS2_MAGIC &&
703 (fs->fs_sblockloc == sblockloc ||
704 (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
705 fs->fs_bsize <= MAXBSIZE &&
706 fs->fs_bsize >= sizeof(struct fs))
707 break;
708 brelse(bp);
709 bp = NULL;
710 }
711 if (sblock_try[i] == -1) {
712 error = EINVAL; /* XXX needs translation */
713 goto out;
714 }
715 fs->fs_fmod = 0;
716 fs->fs_flags &= ~FS_INDEXDIRS; /* no support for directory indicies */
717 fs->fs_flags &= ~FS_UNCLEAN;
718 if (fs->fs_clean == 0) {
719 fs->fs_flags |= FS_UNCLEAN;
720 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
721 ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
722 (fs->fs_flags & FS_DOSOFTDEP))) {
723 printf("WARNING: %s was not properly dismounted\n",
724 fs->fs_fsmnt);
725 } else {
726 printf(
727"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
728 fs->fs_fsmnt);
729 if (fs->fs_flags & FS_SUJ)
730 printf(
731"WARNING: Forced mount will invalidate journal contents\n");
732 error = EPERM;
733 goto out;
734 }
735 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
736 (mp->mnt_flag & MNT_FORCE)) {
737 printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
738 (intmax_t)fs->fs_pendingblocks,
739 fs->fs_pendinginodes);
740 fs->fs_pendingblocks = 0;
741 fs->fs_pendinginodes = 0;
742 }
743 }
744 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
745 printf("%s: mount pending error: blocks %jd files %d\n",
746 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
747 fs->fs_pendinginodes);
748 fs->fs_pendingblocks = 0;
749 fs->fs_pendinginodes = 0;
750 }
751 if ((fs->fs_flags & FS_GJOURNAL) != 0) {
752#ifdef UFS_GJOURNAL
753 /*
754 * Get journal provider name.
755 */
756 size = 1024;
757 mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
758 if (g_io_getattr("GJOURNAL::provider", cp, &size,
759 mp->mnt_gjprovider) == 0) {
760 mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
761 M_UFSMNT, M_WAITOK);
762 MNT_ILOCK(mp);
763 mp->mnt_flag |= MNT_GJOURNAL;
764 MNT_IUNLOCK(mp);
765 } else {
766 printf(
767"WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
768 mp->mnt_stat.f_mntonname);
769 free(mp->mnt_gjprovider, M_UFSMNT);
770 mp->mnt_gjprovider = NULL;
771 }
772#else
773 printf(
774"WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
775 mp->mnt_stat.f_mntonname);
776#endif
777 } else {
778 mp->mnt_gjprovider = NULL;
779 }
780 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
781 ump->um_cp = cp;
782 ump->um_bo = &devvp->v_bufobj;
783 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
784 if (fs->fs_magic == FS_UFS1_MAGIC) {
785 ump->um_fstype = UFS1;
786 ump->um_balloc = ffs_balloc_ufs1;
787 } else {
788 ump->um_fstype = UFS2;
789 ump->um_balloc = ffs_balloc_ufs2;
790 }
791 ump->um_blkatoff = ffs_blkatoff;
792 ump->um_truncate = ffs_truncate;
793 ump->um_update = ffs_update;
794 ump->um_valloc = ffs_valloc;
795 ump->um_vfree = ffs_vfree;
796 ump->um_ifree = ffs_ifree;
797 ump->um_rdonly = ffs_rdonly;
798 mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
799 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
800 if (fs->fs_sbsize < SBLOCKSIZE)
801 bp->b_flags |= B_INVAL | B_NOCACHE;
802 brelse(bp);
803 bp = NULL;
804 fs = ump->um_fs;
805 ffs_oldfscompat_read(fs, ump, sblockloc);
806 fs->fs_ronly = ronly;
807 size = fs->fs_cssize;
808 blks = howmany(size, fs->fs_fsize);
809 if (fs->fs_contigsumsize > 0)
810 size += fs->fs_ncg * sizeof(int32_t);
811 size += fs->fs_ncg * sizeof(u_int8_t);
812 space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
813 fs->fs_csp = space;
814 for (i = 0; i < blks; i += fs->fs_frag) {
815 size = fs->fs_bsize;
816 if (i + fs->fs_frag > blks)
817 size = (blks - i) * fs->fs_fsize;
818 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
819 cred, &bp)) != 0) {
820 free(fs->fs_csp, M_UFSMNT);
821 goto out;
822 }
823 bcopy(bp->b_data, space, (u_int)size);
824 space = (char *)space + size;
825 brelse(bp);
826 bp = NULL;
827 }
828 if (fs->fs_contigsumsize > 0) {
829 fs->fs_maxcluster = lp = space;
830 for (i = 0; i < fs->fs_ncg; i++)
831 *lp++ = fs->fs_contigsumsize;
832 space = lp;
833 }
834 size = fs->fs_ncg * sizeof(u_int8_t);
835 fs->fs_contigdirs = (u_int8_t *)space;
836 bzero(fs->fs_contigdirs, size);
837 fs->fs_active = NULL;
838 mp->mnt_data = ump;
839 mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
840 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
841 nmp = NULL;
842 if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
843 (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
844 if (nmp)
845 vfs_rel(nmp);
846 vfs_getnewfsid(mp);
847 }
848 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
849 MNT_ILOCK(mp);
850 mp->mnt_flag |= MNT_LOCAL;
851 MNT_IUNLOCK(mp);
852 if ((fs->fs_flags & FS_MULTILABEL) != 0) {
853#ifdef MAC
854 MNT_ILOCK(mp);
855 mp->mnt_flag |= MNT_MULTILABEL;
856 MNT_IUNLOCK(mp);
857#else
858 printf(
859"WARNING: %s: multilabel flag on fs but no MAC support\n",
860 mp->mnt_stat.f_mntonname);
861#endif
862 }
863 if ((fs->fs_flags & FS_ACLS) != 0) {
864#ifdef UFS_ACL
865 MNT_ILOCK(mp);
866
867 if (mp->mnt_flag & MNT_NFS4ACLS)
868 printf("WARNING: ACLs flag on fs conflicts with "
869 "\"nfsv4acls\" mount option; option ignored\n");
870 mp->mnt_flag &= ~MNT_NFS4ACLS;
871 mp->mnt_flag |= MNT_ACLS;
872
873 MNT_IUNLOCK(mp);
874#else
875 printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
876 mp->mnt_stat.f_mntonname);
877#endif
878 }
879 if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
880#ifdef UFS_ACL
881 MNT_ILOCK(mp);
882
883 if (mp->mnt_flag & MNT_ACLS)
884 printf("WARNING: NFSv4 ACLs flag on fs conflicts with "
885 "\"acls\" mount option; option ignored\n");
886 mp->mnt_flag &= ~MNT_ACLS;
887 mp->mnt_flag |= MNT_NFS4ACLS;
888
889 MNT_IUNLOCK(mp);
890#else
891 printf(
892"WARNING: %s: NFSv4 ACLs flag on fs but no ACLs support\n",
893 mp->mnt_stat.f_mntonname);
894#endif
895 }
896
897 ump->um_mountp = mp;
898 ump->um_dev = dev;
899 ump->um_devvp = devvp;
900 ump->um_nindir = fs->fs_nindir;
901 ump->um_bptrtodb = fs->fs_fsbtodb;
902 ump->um_seqinc = fs->fs_frag;
903 for (i = 0; i < MAXQUOTAS; i++)
904 ump->um_quotas[i] = NULLVP;
905#ifdef UFS_EXTATTR
906 ufs_extattr_uepm_init(&ump->um_extattr);
907#endif
908 /*
909 * Set FS local "last mounted on" information (NULL pad)
910 */
911 bzero(fs->fs_fsmnt, MAXMNTLEN);
912 strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
913 mp->mnt_stat.f_iosize = fs->fs_bsize;
914
915 if( mp->mnt_flag & MNT_ROOTFS) {
916 /*
917 * Root mount; update timestamp in mount structure.
918 * this will be used by the common root mount code
919 * to update the system clock.
920 */
921 mp->mnt_time = fs->fs_time;
922 }
923
924 if (ronly == 0) {
925 fs->fs_mtime = time_second;
926 if ((fs->fs_flags & FS_DOSOFTDEP) &&
927 (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
928 free(fs->fs_csp, M_UFSMNT);
929 goto out;
930 }
931 if (fs->fs_snapinum[0] != 0)
932 ffs_snapshot_mount(mp);
933 fs->fs_fmod = 1;
934 fs->fs_clean = 0;
935 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
936 }
937 /*
938 * Initialize filesystem stat information in mount struct.
939 */
940 MNT_ILOCK(mp);
941 mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
942 MNTK_EXTENDED_SHARED;
943 MNT_IUNLOCK(mp);
944#ifdef UFS_EXTATTR
945#ifdef UFS_EXTATTR_AUTOSTART
946 /*
947 *
948 * Auto-starting does the following:
949 * - check for /.attribute in the fs, and extattr_start if so
950 * - for each file in .attribute, enable that file with
951 * an attribute of the same name.
952 * Not clear how to report errors -- probably eat them.
953 * This would all happen while the filesystem was busy/not
954 * available, so would effectively be "atomic".
955 */
956 (void) ufs_extattr_autostart(mp, td);
957#endif /* !UFS_EXTATTR_AUTOSTART */
958#endif /* !UFS_EXTATTR */
959 return (0);
960out:
961 if (bp)
962 brelse(bp);
963 if (cp != NULL) {
964 DROP_GIANT();
965 g_topology_lock();
966 g_vfs_close(cp);
967 g_topology_unlock();
968 PICKUP_GIANT();
969 }
970 if (ump) {
971 mtx_destroy(UFS_MTX(ump));
972 if (mp->mnt_gjprovider != NULL) {
973 free(mp->mnt_gjprovider, M_UFSMNT);
974 mp->mnt_gjprovider = NULL;
975 }
976 free(ump->um_fs, M_UFSMNT);
977 free(ump, M_UFSMNT);
978 mp->mnt_data = NULL;
979 }
980 dev_rel(dev);
981 return (error);
982}
983
984#include <sys/sysctl.h>
985static int bigcgs = 0;
986SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
987
988/*
989 * Sanity checks for loading old filesystem superblocks.
990 * See ffs_oldfscompat_write below for unwound actions.
991 *
992 * XXX - Parts get retired eventually.
993 * Unfortunately new bits get added.
994 */
995static void
996ffs_oldfscompat_read(fs, ump, sblockloc)
997 struct fs *fs;
998 struct ufsmount *ump;
999 ufs2_daddr_t sblockloc;
1000{
1001 off_t maxfilesize;
1002
1003 /*
1004 * If not yet done, update fs_flags location and value of fs_sblockloc.
1005 */
1006 if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1007 fs->fs_flags = fs->fs_old_flags;
1008 fs->fs_old_flags |= FS_FLAGS_UPDATED;
1009 fs->fs_sblockloc = sblockloc;
1010 }
1011 /*
1012 * If not yet done, update UFS1 superblock with new wider fields.
1013 */
1014 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1015 fs->fs_maxbsize = fs->fs_bsize;
1016 fs->fs_time = fs->fs_old_time;
1017 fs->fs_size = fs->fs_old_size;
1018 fs->fs_dsize = fs->fs_old_dsize;
1019 fs->fs_csaddr = fs->fs_old_csaddr;
1020 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1021 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1022 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1023 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1024 }
1025 if (fs->fs_magic == FS_UFS1_MAGIC &&
1026 fs->fs_old_inodefmt < FS_44INODEFMT) {
1027 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1028 fs->fs_qbmask = ~fs->fs_bmask;
1029 fs->fs_qfmask = ~fs->fs_fmask;
1030 }
1031 if (fs->fs_magic == FS_UFS1_MAGIC) {
1032 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1033 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1034 if (fs->fs_maxfilesize > maxfilesize)
1035 fs->fs_maxfilesize = maxfilesize;
1036 }
1037 /* Compatibility for old filesystems */
1038 if (fs->fs_avgfilesize <= 0)
1039 fs->fs_avgfilesize = AVFILESIZ;
1040 if (fs->fs_avgfpdir <= 0)
1041 fs->fs_avgfpdir = AFPDIR;
1042 if (bigcgs) {
1043 fs->fs_save_cgsize = fs->fs_cgsize;
1044 fs->fs_cgsize = fs->fs_bsize;
1045 }
1046}
1047
1048/*
1049 * Unwinding superblock updates for old filesystems.
1050 * See ffs_oldfscompat_read above for details.
1051 *
1052 * XXX - Parts get retired eventually.
1053 * Unfortunately new bits get added.
1054 */
1055void
1056ffs_oldfscompat_write(fs, ump)
1057 struct fs *fs;
1058 struct ufsmount *ump;
1059{
1060
1061 /*
1062 * Copy back UFS2 updated fields that UFS1 inspects.
1063 */
1064 if (fs->fs_magic == FS_UFS1_MAGIC) {
1065 fs->fs_old_time = fs->fs_time;
1066 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1067 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1068 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1069 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1070 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1071 }
1072 if (bigcgs) {
1073 fs->fs_cgsize = fs->fs_save_cgsize;
1074 fs->fs_save_cgsize = 0;
1075 }
1076}
1077
1078/*
1079 * unmount system call
1080 */
1081static int
1082ffs_unmount(mp, mntflags)
1083 struct mount *mp;
1084 int mntflags;
1085{
1086 struct thread *td;
1087 struct ufsmount *ump = VFSTOUFS(mp);
1088 struct fs *fs;
1089 int error, flags, susp;
1090#ifdef UFS_EXTATTR
1091 int e_restart;
1092#endif
1093
1094 flags = 0;
1095 td = curthread;
1096 fs = ump->um_fs;
1097 if (mntflags & MNT_FORCE) {
1098 flags |= FORCECLOSE;
1099 susp = fs->fs_ronly != 0;
1100 } else
1101 susp = 0;
1102#ifdef UFS_EXTATTR
1103 if ((error = ufs_extattr_stop(mp, td))) {
1104 if (error != EOPNOTSUPP)
1105 printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1106 error);
1107 e_restart = 0;
1108 } else {
1109 ufs_extattr_uepm_destroy(&ump->um_extattr);
1110 e_restart = 1;
1111 }
1112#endif
1113 if (susp) {
1114 /*
1115 * dounmount already called vn_start_write().
1116 */
1117 for (;;) {
1118 vn_finished_write(mp);
1119 if ((error = vfs_write_suspend(mp)) != 0)
1120 return (error);
1121 MNT_ILOCK(mp);
1122 if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1123 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1124 MNTK_SUSPEND2);
1125 wakeup(&mp->mnt_flag);
1126 MNT_IUNLOCK(mp);
1127 td->td_pflags |= TDP_IGNSUSP;
1128 break;
1129 }
1130 MNT_IUNLOCK(mp);
1131 vn_start_write(NULL, &mp, V_WAIT);
1132 }
1133 }
1134 if (mp->mnt_flag & MNT_SOFTDEP)
1135 error = softdep_flushfiles(mp, flags, td);
1136 else
1137 error = ffs_flushfiles(mp, flags, td);
1138 if (error != 0 && error != ENXIO)
1139 goto fail;
1140
1141 UFS_LOCK(ump);
1142 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1143 printf("%s: unmount pending error: blocks %jd files %d\n",
1144 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1145 fs->fs_pendinginodes);
1146 fs->fs_pendingblocks = 0;
1147 fs->fs_pendinginodes = 0;
1148 }
1149 UFS_UNLOCK(ump);
1150 softdep_unmount(mp);
1151 if (fs->fs_ronly == 0) {
1152 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1153 error = ffs_sbupdate(ump, MNT_WAIT, 0);
1154 if (error && error != ENXIO) {
1155 fs->fs_clean = 0;
1156 goto fail;
1157 }
1158 }
1159 if (susp) {
1160 vfs_write_resume(mp);
1161 vn_start_write(NULL, &mp, V_WAIT);
1162 }
1163 DROP_GIANT();
1164 g_topology_lock();
1165 g_vfs_close(ump->um_cp);
1166 g_topology_unlock();
1167 PICKUP_GIANT();
1168 vrele(ump->um_devvp);
1169 dev_rel(ump->um_dev);
1170 mtx_destroy(UFS_MTX(ump));
1171 if (mp->mnt_gjprovider != NULL) {
1172 free(mp->mnt_gjprovider, M_UFSMNT);
1173 mp->mnt_gjprovider = NULL;
1174 }
1175 free(fs->fs_csp, M_UFSMNT);
1176 free(fs, M_UFSMNT);
1177 free(ump, M_UFSMNT);
1178 mp->mnt_data = NULL;
1179 MNT_ILOCK(mp);
1180 mp->mnt_flag &= ~MNT_LOCAL;
1181 MNT_IUNLOCK(mp);
1182 return (error);
1183
1184fail:
1185 if (susp) {
1186 vfs_write_resume(mp);
1187 vn_start_write(NULL, &mp, V_WAIT);
1188 }
1189#ifdef UFS_EXTATTR
1190 if (e_restart) {
1191 ufs_extattr_uepm_init(&ump->um_extattr);
1192#ifdef UFS_EXTATTR_AUTOSTART
1193 (void) ufs_extattr_autostart(mp, td);
1194#endif
1195 }
1196#endif
1197
1198 return (error);
1199}
1200
1201/*
1202 * Flush out all the files in a filesystem.
1203 */
1204int
1205ffs_flushfiles(mp, flags, td)
1206 struct mount *mp;
1207 int flags;
1208 struct thread *td;
1209{
1210 struct ufsmount *ump;
1211 int error;
1212
1213 ump = VFSTOUFS(mp);
1214#ifdef QUOTA
1215 if (mp->mnt_flag & MNT_QUOTA) {
1216 int i;
1217 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1218 if (error)
1219 return (error);
1220 for (i = 0; i < MAXQUOTAS; i++) {
1221 quotaoff(td, mp, i);
1222 }
1223 /*
1224 * Here we fall through to vflush again to ensure
1225 * that we have gotten rid of all the system vnodes.
1226 */
1227 }
1228#endif
1229 ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1230 if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1231 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1232 return (error);
1233 ffs_snapshot_unmount(mp);
1234 flags |= FORCECLOSE;
1235 /*
1236 * Here we fall through to vflush again to ensure
1237 * that we have gotten rid of all the system vnodes.
1238 */
1239 }
1240 /*
1241 * Flush all the files.
1242 */
1243 if ((error = vflush(mp, 0, flags, td)) != 0)
1244 return (error);
1245 /*
1246 * Flush filesystem metadata.
1247 */
1248 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1249 error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1250 VOP_UNLOCK(ump->um_devvp, 0);
1251 return (error);
1252}
1253
1254/*
1255 * Get filesystem statistics.
1256 */
1257static int
1258ffs_statfs(mp, sbp)
1259 struct mount *mp;
1260 struct statfs *sbp;
1261{
1262 struct ufsmount *ump;
1263 struct fs *fs;
1264
1265 ump = VFSTOUFS(mp);
1266 fs = ump->um_fs;
1267 if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1268 panic("ffs_statfs");
1269 sbp->f_version = STATFS_VERSION;
1270 sbp->f_bsize = fs->fs_fsize;
1271 sbp->f_iosize = fs->fs_bsize;
1272 sbp->f_blocks = fs->fs_dsize;
1273 UFS_LOCK(ump);
1274 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1275 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1276 sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1277 dbtofsb(fs, fs->fs_pendingblocks);
1278 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
1279 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1280 UFS_UNLOCK(ump);
1281 sbp->f_namemax = NAME_MAX;
1282 return (0);
1283}
1284
1285/*
1286 * Go through the disk queues to initiate sandbagged IO;
1287 * go through the inodes to write those that have been modified;
1288 * initiate the writing of the super block if it has been modified.
1289 *
1290 * Note: we are always called with the filesystem marked `MPBUSY'.
1291 */
1292static int
1293ffs_sync(mp, waitfor)
1294 struct mount *mp;
1295 int waitfor;
1296{
1297 struct vnode *mvp, *vp, *devvp;
1298 struct thread *td;
1299 struct inode *ip;
1300 struct ufsmount *ump = VFSTOUFS(mp);
1301 struct fs *fs;
1302 int error, count, wait, lockreq, allerror = 0;
1303 int suspend;
1304 int suspended;
1305 int secondary_writes;
1306 int secondary_accwrites;
1307 int softdep_deps;
1308 int softdep_accdeps;
1309 struct bufobj *bo;
1310
1311 td = curthread;
1312 fs = ump->um_fs;
1313 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1314 printf("fs = %s\n", fs->fs_fsmnt);
1315 panic("ffs_sync: rofs mod");
1316 }
1317 /*
1318 * Write back each (modified) inode.
1319 */
1320 wait = 0;
1321 suspend = 0;
1322 suspended = 0;
1323 lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1324 if (waitfor == MNT_SUSPEND) {
1325 suspend = 1;
1326 waitfor = MNT_WAIT;
1327 }
1328 if (waitfor == MNT_WAIT) {
1329 wait = 1;
1330 lockreq = LK_EXCLUSIVE;
1331 }
1332 lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1333 MNT_ILOCK(mp);
1334loop:
1335 /* Grab snapshot of secondary write counts */
1336 secondary_writes = mp->mnt_secondary_writes;
1337 secondary_accwrites = mp->mnt_secondary_accwrites;
1338
1339 /* Grab snapshot of softdep dependency counts */
1340 MNT_IUNLOCK(mp);
1341 softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1342 MNT_ILOCK(mp);
1343
1344 MNT_VNODE_FOREACH(vp, mp, mvp) {
1345 /*
1346 * Depend on the mntvnode_slock to keep things stable enough
1347 * for a quick test. Since there might be hundreds of
1348 * thousands of vnodes, we cannot afford even a subroutine
1349 * call unless there's a good chance that we have work to do.
1350 */
1351 VI_LOCK(vp);
1352 if (vp->v_iflag & VI_DOOMED) {
1353 VI_UNLOCK(vp);
1354 continue;
1355 }
1356 ip = VTOI(vp);
1357 if (vp->v_type == VNON || ((ip->i_flag &
1358 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1359 vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1360 VI_UNLOCK(vp);
1361 continue;
1362 }
1363 MNT_IUNLOCK(mp);
1364 if ((error = vget(vp, lockreq, td)) != 0) {
1365 MNT_ILOCK(mp);
1366 if (error == ENOENT || error == ENOLCK) {
1367 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1368 goto loop;
1369 }
1370 continue;
1371 }
1372 if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1373 allerror = error;
1374 vput(vp);
1375 MNT_ILOCK(mp);
1376 }
1377 MNT_IUNLOCK(mp);
1378 /*
1379 * Force stale filesystem control information to be flushed.
1380 */
1381 if (waitfor == MNT_WAIT) {
1382 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1383 allerror = error;
1384 /* Flushed work items may create new vnodes to clean */
1385 if (allerror == 0 && count) {
1386 MNT_ILOCK(mp);
1387 goto loop;
1388 }
1389 }
1390#ifdef QUOTA
1391 qsync(mp);
1392#endif
1393 devvp = ump->um_devvp;
1394 bo = &devvp->v_bufobj;
1395 BO_LOCK(bo);
1396 if (waitfor != MNT_LAZY &&
1397 (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1398 BO_UNLOCK(bo);
1399 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1400 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1401 allerror = error;
1402 VOP_UNLOCK(devvp, 0);
1403 if (allerror == 0 && waitfor == MNT_WAIT) {
1404 MNT_ILOCK(mp);
1405 goto loop;
1406 }
1407 } else if (suspend != 0) {
1408 if (softdep_check_suspend(mp,
1409 devvp,
1410 softdep_deps,
1411 softdep_accdeps,
1412 secondary_writes,
1413 secondary_accwrites) != 0)
1414 goto loop; /* More work needed */
1415 mtx_assert(MNT_MTX(mp), MA_OWNED);
1416 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1417 MNT_IUNLOCK(mp);
1418 suspended = 1;
1419 } else
1420 BO_UNLOCK(bo);
1421 /*
1422 * Write back modified superblock.
1423 */
1424 if (fs->fs_fmod != 0 &&
1425 (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1426 allerror = error;
1427 return (allerror);
1428}
1429
1430int
1431ffs_vget(mp, ino, flags, vpp)
1432 struct mount *mp;
1433 ino_t ino;
1434 int flags;
1435 struct vnode **vpp;
1436{
1437 return (ffs_vgetf(mp, ino, flags, vpp, 0));
1438}
1439
1440int
1441ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1442 struct mount *mp;
1443 ino_t ino;
1444 int flags;
1445 struct vnode **vpp;
1446 int ffs_flags;
1447{
1448 struct fs *fs;
1449 struct inode *ip;
1450 struct ufsmount *ump;
1451 struct buf *bp;
1452 struct vnode *vp;
1453 struct cdev *dev;
1454 int error;
1455
1456 error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1457 if (error || *vpp != NULL)
1458 return (error);
1459
1460 /*
1461 * We must promote to an exclusive lock for vnode creation. This
1462 * can happen if lookup is passed LOCKSHARED.
1463 */
1464 if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1465 flags &= ~LK_TYPE_MASK;
1466 flags |= LK_EXCLUSIVE;
1467 }
1468
1469 /*
1470 * We do not lock vnode creation as it is believed to be too
1471 * expensive for such rare case as simultaneous creation of vnode
1472 * for same ino by different processes. We just allow them to race
1473 * and check later to decide who wins. Let the race begin!
1474 */
1475
1476 ump = VFSTOUFS(mp);
1477 dev = ump->um_dev;
1478 fs = ump->um_fs;
1479
1480 /*
1481 * If this malloc() is performed after the getnewvnode()
1482 * it might block, leaving a vnode with a NULL v_data to be
1483 * found by ffs_sync() if a sync happens to fire right then,
1484 * which will cause a panic because ffs_sync() blindly
1485 * dereferences vp->v_data (as well it should).
1486 */
1487 ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1488
1489 /* Allocate a new vnode/inode. */
1490 if (fs->fs_magic == FS_UFS1_MAGIC)
1491 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1492 else
1493 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1494 if (error) {
1495 *vpp = NULL;
1496 uma_zfree(uma_inode, ip);
1497 return (error);
1498 }
1499 /*
1500 * FFS supports recursive locking.
1501 */
1502 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1503 VN_LOCK_AREC(vp);
1504 vp->v_data = ip;
1505 vp->v_bufobj.bo_bsize = fs->fs_bsize;
1506 ip->i_vnode = vp;
1507 ip->i_ump = ump;
1508 ip->i_fs = fs;
1509 ip->i_dev = dev;
1510 ip->i_number = ino;
1511 ip->i_ea_refs = 0;
1512#ifdef QUOTA
1513 {
1514 int i;
1515 for (i = 0; i < MAXQUOTAS; i++)
1516 ip->i_dquot[i] = NODQUOT;
1517 }
1518#endif
1519
1520 if (ffs_flags & FFSV_FORCEINSMQ)
1521 vp->v_vflag |= VV_FORCEINSMQ;
1522 error = insmntque(vp, mp);
1523 if (error != 0) {
1524 uma_zfree(uma_inode, ip);
1525 *vpp = NULL;
1526 return (error);
1527 }
1528 vp->v_vflag &= ~VV_FORCEINSMQ;
1529 error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1530 if (error || *vpp != NULL)
1531 return (error);
1532
1533 /* Read in the disk contents for the inode, copy into the inode. */
1534 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1535 (int)fs->fs_bsize, NOCRED, &bp);
1536 if (error) {
1537 /*
1538 * The inode does not contain anything useful, so it would
1539 * be misleading to leave it on its hash chain. With mode
1540 * still zero, it will be unlinked and returned to the free
1541 * list by vput().
1542 */
1543 brelse(bp);
1544 vput(vp);
1545 *vpp = NULL;
1546 return (error);
1547 }
1548 if (ip->i_ump->um_fstype == UFS1)
1549 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1550 else
1551 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1552 ffs_load_inode(bp, ip, fs, ino);
1553 if (DOINGSOFTDEP(vp))
1554 softdep_load_inodeblock(ip);
1555 else
1556 ip->i_effnlink = ip->i_nlink;
1557 bqrelse(bp);
1558
1559 /*
1560 * Initialize the vnode from the inode, check for aliases.
1561 * Note that the underlying vnode may have changed.
1562 */
1563 if (ip->i_ump->um_fstype == UFS1)
1564 error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1565 else
1566 error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1567 if (error) {
1568 vput(vp);
1569 *vpp = NULL;
1570 return (error);
1571 }
1572
1573 /*
1574 * Finish inode initialization.
1575 */
1576 if (vp->v_type != VFIFO) {
1577 /* FFS supports shared locking for all files except fifos. */
1578 VN_LOCK_ASHARE(vp);
1579 }
1580
1581 /*
1582 * Set up a generation number for this inode if it does not
1583 * already have one. This should only happen on old filesystems.
1584 */
1585 if (ip->i_gen == 0) {
1586 ip->i_gen = arc4random() / 2 + 1;
1587 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1588 ip->i_flag |= IN_MODIFIED;
1589 DIP_SET(ip, i_gen, ip->i_gen);
1590 }
1591 }
1592#ifdef MAC
1593 if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1594 /*
1595 * If this vnode is already allocated, and we're running
1596 * multi-label, attempt to perform a label association
1597 * from the extended attributes on the inode.
1598 */
1599 error = mac_vnode_associate_extattr(mp, vp);
1600 if (error) {
1601 /* ufs_inactive will release ip->i_devvp ref. */
1602 vput(vp);
1603 *vpp = NULL;
1604 return (error);
1605 }
1606 }
1607#endif
1608
1609 *vpp = vp;
1610 return (0);
1611}
1612
1613/*
1614 * File handle to vnode
1615 *
1616 * Have to be really careful about stale file handles:
1617 * - check that the inode number is valid
1618 * - call ffs_vget() to get the locked inode
1619 * - check for an unallocated inode (i_mode == 0)
1620 * - check that the given client host has export rights and return
1621 * those rights via. exflagsp and credanonp
1622 */
1623static int
1624ffs_fhtovp(mp, fhp, vpp)
1625 struct mount *mp;
1626 struct fid *fhp;
1627 struct vnode **vpp;
1628{
1629 struct ufid *ufhp;
1630 struct fs *fs;
1631
1632 ufhp = (struct ufid *)fhp;
1633 fs = VFSTOUFS(mp)->um_fs;
1634 if (ufhp->ufid_ino < ROOTINO ||
1635 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1636 return (ESTALE);
1637 return (ufs_fhtovp(mp, ufhp, vpp));
1638}
1639
1640/*
1641 * Initialize the filesystem.
1642 */
1643static int
1644ffs_init(vfsp)
1645 struct vfsconf *vfsp;
1646{
1647
1648 softdep_initialize();
1649 return (ufs_init(vfsp));
1650}
1651
1652/*
1653 * Undo the work of ffs_init().
1654 */
1655static int
1656ffs_uninit(vfsp)
1657 struct vfsconf *vfsp;
1658{
1659 int ret;
1660
1661 ret = ufs_uninit(vfsp);
1662 softdep_uninitialize();
1663 return (ret);
1664}
1665
1666/*
1667 * Write a superblock and associated information back to disk.
1668 */
1669int
1670ffs_sbupdate(mp, waitfor, suspended)
1671 struct ufsmount *mp;
1672 int waitfor;
1673 int suspended;
1674{
1675 struct fs *fs = mp->um_fs;
1676 struct buf *sbbp;
1677 struct buf *bp;
1678 int blks;
1679 void *space;
1680 int i, size, error, allerror = 0;
1681
1682 if (fs->fs_ronly == 1 &&
1683 (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1684 (MNT_RDONLY | MNT_UPDATE))
1685 panic("ffs_sbupdate: write read-only filesystem");
1686 /*
1687 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1688 */
1689 sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1690 0, 0, 0);
1691 /*
1692 * First write back the summary information.
1693 */
1694 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1695 space = fs->fs_csp;
1696 for (i = 0; i < blks; i += fs->fs_frag) {
1697 size = fs->fs_bsize;
1698 if (i + fs->fs_frag > blks)
1699 size = (blks - i) * fs->fs_fsize;
1700 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1701 size, 0, 0, 0);
1702 bcopy(space, bp->b_data, (u_int)size);
1703 space = (char *)space + size;
1704 if (suspended)
1705 bp->b_flags |= B_VALIDSUSPWRT;
1706 if (waitfor != MNT_WAIT)
1707 bawrite(bp);
1708 else if ((error = bwrite(bp)) != 0)
1709 allerror = error;
1710 }
1711 /*
1712 * Now write back the superblock itself. If any errors occurred
1713 * up to this point, then fail so that the superblock avoids
1714 * being written out as clean.
1715 */
1716 if (allerror) {
1717 brelse(sbbp);
1718 return (allerror);
1719 }
1720 bp = sbbp;
1721 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1722 (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1723 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1724 fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1725 fs->fs_sblockloc = SBLOCK_UFS1;
1726 }
1727 if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1728 (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1729 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1730 fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1731 fs->fs_sblockloc = SBLOCK_UFS2;
1732 }
1733 fs->fs_fmod = 0;
1734 fs->fs_time = time_second;
1735 if (fs->fs_flags & FS_DOSOFTDEP)
1736 softdep_setup_sbupdate(mp, (struct fs *)bp->b_data, bp);
1737 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1738 ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1739 if (suspended)
1740 bp->b_flags |= B_VALIDSUSPWRT;
1741 if (waitfor != MNT_WAIT)
1742 bawrite(bp);
1743 else if ((error = bwrite(bp)) != 0)
1744 allerror = error;
1745 return (allerror);
1746}
1747
1748static int
1749ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1750 int attrnamespace, const char *attrname)
1751{
1752
1753#ifdef UFS_EXTATTR
1754 return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1755 attrname));
1756#else
1757 return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1758 attrname));
1759#endif
1760}
1761
1762static void
1763ffs_ifree(struct ufsmount *ump, struct inode *ip)
1764{
1765
1766 if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1767 uma_zfree(uma_ufs1, ip->i_din1);
1768 else if (ip->i_din2 != NULL)
1769 uma_zfree(uma_ufs2, ip->i_din2);
1770 uma_zfree(uma_inode, ip);
1771}
1772
1773static int dobkgrdwrite = 1;
1774SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1775 "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1776
1777/*
1778 * Complete a background write started from bwrite.
1779 */
1780static void
1781ffs_backgroundwritedone(struct buf *bp)
1782{
1783 struct bufobj *bufobj;
1784 struct buf *origbp;
1785
1786 /*
1787 * Find the original buffer that we are writing.
1788 */
1789 bufobj = bp->b_bufobj;
1790 BO_LOCK(bufobj);
1791 if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1792 panic("backgroundwritedone: lost buffer");
1793 /* Grab an extra reference to be dropped by the bufdone() below. */
1794 bufobj_wrefl(bufobj);
1795 BO_UNLOCK(bufobj);
1796 /*
1797 * Process dependencies then return any unfinished ones.
1798 */
1799 if (!LIST_EMPTY(&bp->b_dep))
1800 buf_complete(bp);
1801#ifdef SOFTUPDATES
1802 if (!LIST_EMPTY(&bp->b_dep))
1803 softdep_move_dependencies(bp, origbp);
1804#endif
1805 /*
1806 * This buffer is marked B_NOCACHE so when it is released
1807 * by biodone it will be tossed.
1808 */
1809 bp->b_flags |= B_NOCACHE;
1810 bp->b_flags &= ~B_CACHE;
1811 bufdone(bp);
1812 BO_LOCK(bufobj);
1813 /*
1814 * Clear the BV_BKGRDINPROG flag in the original buffer
1815 * and awaken it if it is waiting for the write to complete.
1816 * If BV_BKGRDINPROG is not set in the original buffer it must
1817 * have been released and re-instantiated - which is not legal.
1818 */
1819 KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1820 ("backgroundwritedone: lost buffer2"));
1821 origbp->b_vflags &= ~BV_BKGRDINPROG;
1822 if (origbp->b_vflags & BV_BKGRDWAIT) {
1823 origbp->b_vflags &= ~BV_BKGRDWAIT;
1824 wakeup(&origbp->b_xflags);
1825 }
1826 BO_UNLOCK(bufobj);
1827}
1828
1829
1830/*
1831 * Write, release buffer on completion. (Done by iodone
1832 * if async). Do not bother writing anything if the buffer
1833 * is invalid.
1834 *
1835 * Note that we set B_CACHE here, indicating that buffer is
1836 * fully valid and thus cacheable. This is true even of NFS
1837 * now so we set it generally. This could be set either here
1838 * or in biodone() since the I/O is synchronous. We put it
1839 * here.
1840 */
1841static int
1842ffs_bufwrite(struct buf *bp)
1843{
1844 int oldflags, s;
1845 struct buf *newbp;
1846
1847 CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1848 if (bp->b_flags & B_INVAL) {
1849 brelse(bp);
1850 return (0);
1851 }
1852
1853 oldflags = bp->b_flags;
1854
1855 if (!BUF_ISLOCKED(bp))
1856 panic("bufwrite: buffer is not busy???");
1857 s = splbio();
1858 /*
1859 * If a background write is already in progress, delay
1860 * writing this block if it is asynchronous. Otherwise
1861 * wait for the background write to complete.
1862 */
1863 BO_LOCK(bp->b_bufobj);
1864 if (bp->b_vflags & BV_BKGRDINPROG) {
1865 if (bp->b_flags & B_ASYNC) {
1866 BO_UNLOCK(bp->b_bufobj);
1867 splx(s);
1868 bdwrite(bp);
1869 return (0);
1870 }
1871 bp->b_vflags |= BV_BKGRDWAIT;
1872 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1873 if (bp->b_vflags & BV_BKGRDINPROG)
1874 panic("bufwrite: still writing");
1875 }
1876 BO_UNLOCK(bp->b_bufobj);
1877
1878 /*
1879 * If this buffer is marked for background writing and we
1880 * do not have to wait for it, make a copy and write the
1881 * copy so as to leave this buffer ready for further use.
1882 *
1883 * This optimization eats a lot of memory. If we have a page
1884 * or buffer shortfall we can't do it.
1885 */
1886 if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1887 (bp->b_flags & B_ASYNC) &&
1888 !vm_page_count_severe() &&
1889 !buf_dirty_count_severe()) {
1890 KASSERT(bp->b_iodone == NULL,
1891 ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1892
1893 /* get a new block */
1894 newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
1895 if (newbp == NULL)
1896 goto normal_write;
1897
1898 /*
1899 * set it to be identical to the old block. We have to
1900 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1901 * to avoid confusing the splay tree and gbincore().
1902 */
1903 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1904 newbp->b_lblkno = bp->b_lblkno;
1905 newbp->b_xflags |= BX_BKGRDMARKER;
1906 BO_LOCK(bp->b_bufobj);
1907 bp->b_vflags |= BV_BKGRDINPROG;
1908 bgetvp(bp->b_vp, newbp);
1909 BO_UNLOCK(bp->b_bufobj);
1910 newbp->b_bufobj = &bp->b_vp->v_bufobj;
1911 newbp->b_blkno = bp->b_blkno;
1912 newbp->b_offset = bp->b_offset;
1913 newbp->b_iodone = ffs_backgroundwritedone;
1914 newbp->b_flags |= B_ASYNC;
1915 newbp->b_flags &= ~B_INVAL;
1916
1917#ifdef SOFTUPDATES
1918 /*
1919 * Move over the dependencies. If there are rollbacks,
1920 * leave the parent buffer dirtied as it will need to
1921 * be written again.
1922 */
1923 if (LIST_EMPTY(&bp->b_dep) ||
1924 softdep_move_dependencies(bp, newbp) == 0)
1925 bundirty(bp);
1926#else
1927 bundirty(bp);
1928#endif
1929
1930 /*
1931 * Initiate write on the copy, release the original to
1932 * the B_LOCKED queue so that it cannot go away until
1933 * the background write completes. If not locked it could go
1934 * away and then be reconstituted while it was being written.
1935 * If the reconstituted buffer were written, we could end up
1936 * with two background copies being written at the same time.
1937 */
1938 bqrelse(bp);
1939 bp = newbp;
1940 } else
1941 /* Mark the buffer clean */
1942 bundirty(bp);
1943
1944
1945 /* Let the normal bufwrite do the rest for us */
1946normal_write:
1947 return (bufwrite(bp));
1948}
1949
1950
1951static void
1952ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1953{
1954 struct vnode *vp;
1955 int error;
1956 struct buf *tbp;
1957 int nocopy;
1958
1959 vp = bo->__bo_vnode;
1960 if (bp->b_iocmd == BIO_WRITE) {
1961 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1962 bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1963 (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1964 panic("ffs_geom_strategy: bad I/O");
1965 nocopy = bp->b_flags & B_NOCOPY;
1966 bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
1967 if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
1968 vp->v_rdev->si_snapdata != NULL) {
1969 if ((bp->b_flags & B_CLUSTER) != 0) {
1970 runningbufwakeup(bp);
1971 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1972 b_cluster.cluster_entry) {
1973 error = ffs_copyonwrite(vp, tbp);
1974 if (error != 0 &&
1975 error != EOPNOTSUPP) {
1976 bp->b_error = error;
1977 bp->b_ioflags |= BIO_ERROR;
1978 bufdone(bp);
1979 return;
1980 }
1981 }
1982 bp->b_runningbufspace = bp->b_bufsize;
1983 atomic_add_long(&runningbufspace,
1984 bp->b_runningbufspace);
1985 } else {
1986 error = ffs_copyonwrite(vp, bp);
1987 if (error != 0 && error != EOPNOTSUPP) {
1988 bp->b_error = error;
1989 bp->b_ioflags |= BIO_ERROR;
1990 bufdone(bp);
1991 return;
1992 }
1993 }
1994 }
1995#ifdef SOFTUPDATES
1996 if ((bp->b_flags & B_CLUSTER) != 0) {
1997 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1998 b_cluster.cluster_entry) {
1999 if (!LIST_EMPTY(&tbp->b_dep))
2000 buf_start(tbp);
2001 }
2002 } else {
2003 if (!LIST_EMPTY(&bp->b_dep))
2004 buf_start(bp);
2005 }
2006
2007#endif
2008 }
2009 g_vfs_strategy(bo, bp);
2010}
2011
2012#ifdef DDB
2013
2014static void
2015db_print_ffs(struct ufsmount *ump)
2016{
2017 db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
2018 "su_req %d\n",
2019 ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
2020 ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
2021 ump->softdep_on_worklist_inprogress, ump->softdep_deps,
2022 ump->softdep_req);
2023}
2024
2025DB_SHOW_COMMAND(ffs, db_show_ffs)
2026{
2027 struct mount *mp;
2028 struct ufsmount *ump;
2029
2030 if (have_addr) {
2031 ump = VFSTOUFS((struct mount *)addr);
2032 db_print_ffs(ump);
2033 return;
2034 }
2035
2036 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2037 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2038 db_print_ffs(VFSTOUFS(mp));
2039 }
2040}
2041
2042#endif /* DDB */
478 error = kernel_mount(ma, flags);
479
480 return (error);
481}
482
483/*
484 * Reload all incore data for a filesystem (used after running fsck on
485 * the root filesystem and finding things to fix). The filesystem must
486 * be mounted read-only.
487 *
488 * Things to do to update the mount:
489 * 1) invalidate all cached meta-data.
490 * 2) re-read superblock from disk.
491 * 3) re-read summary information from disk.
492 * 4) invalidate all inactive vnodes.
493 * 5) invalidate all cached file data.
494 * 6) re-read inode data for all active vnodes.
495 */
496static int
497ffs_reload(struct mount *mp, struct thread *td)
498{
499 struct vnode *vp, *mvp, *devvp;
500 struct inode *ip;
501 void *space;
502 struct buf *bp;
503 struct fs *fs, *newfs;
504 struct ufsmount *ump;
505 ufs2_daddr_t sblockloc;
506 int i, blks, size, error;
507 int32_t *lp;
508
509 if ((mp->mnt_flag & MNT_RDONLY) == 0)
510 return (EINVAL);
511 ump = VFSTOUFS(mp);
512 /*
513 * Step 1: invalidate all cached meta-data.
514 */
515 devvp = VFSTOUFS(mp)->um_devvp;
516 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
517 if (vinvalbuf(devvp, 0, 0, 0) != 0)
518 panic("ffs_reload: dirty1");
519 VOP_UNLOCK(devvp, 0);
520
521 /*
522 * Step 2: re-read superblock from disk.
523 */
524 fs = VFSTOUFS(mp)->um_fs;
525 if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
526 NOCRED, &bp)) != 0)
527 return (error);
528 newfs = (struct fs *)bp->b_data;
529 if ((newfs->fs_magic != FS_UFS1_MAGIC &&
530 newfs->fs_magic != FS_UFS2_MAGIC) ||
531 newfs->fs_bsize > MAXBSIZE ||
532 newfs->fs_bsize < sizeof(struct fs)) {
533 brelse(bp);
534 return (EIO); /* XXX needs translation */
535 }
536 /*
537 * Copy pointer fields back into superblock before copying in XXX
538 * new superblock. These should really be in the ufsmount. XXX
539 * Note that important parameters (eg fs_ncg) are unchanged.
540 */
541 newfs->fs_csp = fs->fs_csp;
542 newfs->fs_maxcluster = fs->fs_maxcluster;
543 newfs->fs_contigdirs = fs->fs_contigdirs;
544 newfs->fs_active = fs->fs_active;
545 /* The file system is still read-only. */
546 newfs->fs_ronly = 1;
547 sblockloc = fs->fs_sblockloc;
548 bcopy(newfs, fs, (u_int)fs->fs_sbsize);
549 brelse(bp);
550 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
551 ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
552 UFS_LOCK(ump);
553 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
554 printf("%s: reload pending error: blocks %jd files %d\n",
555 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
556 fs->fs_pendinginodes);
557 fs->fs_pendingblocks = 0;
558 fs->fs_pendinginodes = 0;
559 }
560 UFS_UNLOCK(ump);
561
562 /*
563 * Step 3: re-read summary information from disk.
564 */
565 blks = howmany(fs->fs_cssize, fs->fs_fsize);
566 space = fs->fs_csp;
567 for (i = 0; i < blks; i += fs->fs_frag) {
568 size = fs->fs_bsize;
569 if (i + fs->fs_frag > blks)
570 size = (blks - i) * fs->fs_fsize;
571 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
572 NOCRED, &bp);
573 if (error)
574 return (error);
575 bcopy(bp->b_data, space, (u_int)size);
576 space = (char *)space + size;
577 brelse(bp);
578 }
579 /*
580 * We no longer know anything about clusters per cylinder group.
581 */
582 if (fs->fs_contigsumsize > 0) {
583 lp = fs->fs_maxcluster;
584 for (i = 0; i < fs->fs_ncg; i++)
585 *lp++ = fs->fs_contigsumsize;
586 }
587
588loop:
589 MNT_ILOCK(mp);
590 MNT_VNODE_FOREACH(vp, mp, mvp) {
591 VI_LOCK(vp);
592 if (vp->v_iflag & VI_DOOMED) {
593 VI_UNLOCK(vp);
594 continue;
595 }
596 MNT_IUNLOCK(mp);
597 /*
598 * Step 4: invalidate all cached file data.
599 */
600 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
601 MNT_VNODE_FOREACH_ABORT(mp, mvp);
602 goto loop;
603 }
604 if (vinvalbuf(vp, 0, 0, 0))
605 panic("ffs_reload: dirty2");
606 /*
607 * Step 5: re-read inode data for all active vnodes.
608 */
609 ip = VTOI(vp);
610 error =
611 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
612 (int)fs->fs_bsize, NOCRED, &bp);
613 if (error) {
614 VOP_UNLOCK(vp, 0);
615 vrele(vp);
616 MNT_VNODE_FOREACH_ABORT(mp, mvp);
617 return (error);
618 }
619 ffs_load_inode(bp, ip, fs, ip->i_number);
620 ip->i_effnlink = ip->i_nlink;
621 brelse(bp);
622 VOP_UNLOCK(vp, 0);
623 vrele(vp);
624 MNT_ILOCK(mp);
625 }
626 MNT_IUNLOCK(mp);
627 return (0);
628}
629
630/*
631 * Possible superblock locations ordered from most to least likely.
632 */
633static int sblock_try[] = SBLOCKSEARCH;
634
635/*
636 * Common code for mount and mountroot
637 */
638static int
639ffs_mountfs(devvp, mp, td)
640 struct vnode *devvp;
641 struct mount *mp;
642 struct thread *td;
643{
644 struct ufsmount *ump;
645 struct buf *bp;
646 struct fs *fs;
647 struct cdev *dev;
648 void *space;
649 ufs2_daddr_t sblockloc;
650 int error, i, blks, size, ronly;
651 int32_t *lp;
652 struct ucred *cred;
653 struct g_consumer *cp;
654 struct mount *nmp;
655
656 bp = NULL;
657 ump = NULL;
658 cred = td ? td->td_ucred : NOCRED;
659 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
660
661 dev = devvp->v_rdev;
662 dev_ref(dev);
663 DROP_GIANT();
664 g_topology_lock();
665 error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
666
667 /*
668 * If we are a root mount, drop the E flag so fsck can do its magic.
669 * We will pick it up again when we remount R/W.
670 */
671 if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
672 error = g_access(cp, 0, 0, -1);
673 g_topology_unlock();
674 PICKUP_GIANT();
675 VOP_UNLOCK(devvp, 0);
676 if (error)
677 goto out;
678 if (devvp->v_rdev->si_iosize_max != 0)
679 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
680 if (mp->mnt_iosize_max > MAXPHYS)
681 mp->mnt_iosize_max = MAXPHYS;
682
683 devvp->v_bufobj.bo_ops = &ffs_ops;
684
685 fs = NULL;
686 sblockloc = 0;
687 /*
688 * Try reading the superblock in each of its possible locations.
689 */
690 for (i = 0; sblock_try[i] != -1; i++) {
691 if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
692 error = EINVAL;
693 vfs_mount_error(mp,
694 "Invalid sectorsize %d for superblock size %d",
695 cp->provider->sectorsize, SBLOCKSIZE);
696 goto out;
697 }
698 if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
699 cred, &bp)) != 0)
700 goto out;
701 fs = (struct fs *)bp->b_data;
702 sblockloc = sblock_try[i];
703 if ((fs->fs_magic == FS_UFS1_MAGIC ||
704 (fs->fs_magic == FS_UFS2_MAGIC &&
705 (fs->fs_sblockloc == sblockloc ||
706 (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
707 fs->fs_bsize <= MAXBSIZE &&
708 fs->fs_bsize >= sizeof(struct fs))
709 break;
710 brelse(bp);
711 bp = NULL;
712 }
713 if (sblock_try[i] == -1) {
714 error = EINVAL; /* XXX needs translation */
715 goto out;
716 }
717 fs->fs_fmod = 0;
718 fs->fs_flags &= ~FS_INDEXDIRS; /* no support for directory indicies */
719 fs->fs_flags &= ~FS_UNCLEAN;
720 if (fs->fs_clean == 0) {
721 fs->fs_flags |= FS_UNCLEAN;
722 if (ronly || (mp->mnt_flag & MNT_FORCE) ||
723 ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
724 (fs->fs_flags & FS_DOSOFTDEP))) {
725 printf("WARNING: %s was not properly dismounted\n",
726 fs->fs_fsmnt);
727 } else {
728 printf(
729"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
730 fs->fs_fsmnt);
731 if (fs->fs_flags & FS_SUJ)
732 printf(
733"WARNING: Forced mount will invalidate journal contents\n");
734 error = EPERM;
735 goto out;
736 }
737 if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
738 (mp->mnt_flag & MNT_FORCE)) {
739 printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
740 (intmax_t)fs->fs_pendingblocks,
741 fs->fs_pendinginodes);
742 fs->fs_pendingblocks = 0;
743 fs->fs_pendinginodes = 0;
744 }
745 }
746 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
747 printf("%s: mount pending error: blocks %jd files %d\n",
748 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
749 fs->fs_pendinginodes);
750 fs->fs_pendingblocks = 0;
751 fs->fs_pendinginodes = 0;
752 }
753 if ((fs->fs_flags & FS_GJOURNAL) != 0) {
754#ifdef UFS_GJOURNAL
755 /*
756 * Get journal provider name.
757 */
758 size = 1024;
759 mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
760 if (g_io_getattr("GJOURNAL::provider", cp, &size,
761 mp->mnt_gjprovider) == 0) {
762 mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
763 M_UFSMNT, M_WAITOK);
764 MNT_ILOCK(mp);
765 mp->mnt_flag |= MNT_GJOURNAL;
766 MNT_IUNLOCK(mp);
767 } else {
768 printf(
769"WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
770 mp->mnt_stat.f_mntonname);
771 free(mp->mnt_gjprovider, M_UFSMNT);
772 mp->mnt_gjprovider = NULL;
773 }
774#else
775 printf(
776"WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
777 mp->mnt_stat.f_mntonname);
778#endif
779 } else {
780 mp->mnt_gjprovider = NULL;
781 }
782 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
783 ump->um_cp = cp;
784 ump->um_bo = &devvp->v_bufobj;
785 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
786 if (fs->fs_magic == FS_UFS1_MAGIC) {
787 ump->um_fstype = UFS1;
788 ump->um_balloc = ffs_balloc_ufs1;
789 } else {
790 ump->um_fstype = UFS2;
791 ump->um_balloc = ffs_balloc_ufs2;
792 }
793 ump->um_blkatoff = ffs_blkatoff;
794 ump->um_truncate = ffs_truncate;
795 ump->um_update = ffs_update;
796 ump->um_valloc = ffs_valloc;
797 ump->um_vfree = ffs_vfree;
798 ump->um_ifree = ffs_ifree;
799 ump->um_rdonly = ffs_rdonly;
800 mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
801 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
802 if (fs->fs_sbsize < SBLOCKSIZE)
803 bp->b_flags |= B_INVAL | B_NOCACHE;
804 brelse(bp);
805 bp = NULL;
806 fs = ump->um_fs;
807 ffs_oldfscompat_read(fs, ump, sblockloc);
808 fs->fs_ronly = ronly;
809 size = fs->fs_cssize;
810 blks = howmany(size, fs->fs_fsize);
811 if (fs->fs_contigsumsize > 0)
812 size += fs->fs_ncg * sizeof(int32_t);
813 size += fs->fs_ncg * sizeof(u_int8_t);
814 space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
815 fs->fs_csp = space;
816 for (i = 0; i < blks; i += fs->fs_frag) {
817 size = fs->fs_bsize;
818 if (i + fs->fs_frag > blks)
819 size = (blks - i) * fs->fs_fsize;
820 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
821 cred, &bp)) != 0) {
822 free(fs->fs_csp, M_UFSMNT);
823 goto out;
824 }
825 bcopy(bp->b_data, space, (u_int)size);
826 space = (char *)space + size;
827 brelse(bp);
828 bp = NULL;
829 }
830 if (fs->fs_contigsumsize > 0) {
831 fs->fs_maxcluster = lp = space;
832 for (i = 0; i < fs->fs_ncg; i++)
833 *lp++ = fs->fs_contigsumsize;
834 space = lp;
835 }
836 size = fs->fs_ncg * sizeof(u_int8_t);
837 fs->fs_contigdirs = (u_int8_t *)space;
838 bzero(fs->fs_contigdirs, size);
839 fs->fs_active = NULL;
840 mp->mnt_data = ump;
841 mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
842 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
843 nmp = NULL;
844 if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
845 (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
846 if (nmp)
847 vfs_rel(nmp);
848 vfs_getnewfsid(mp);
849 }
850 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
851 MNT_ILOCK(mp);
852 mp->mnt_flag |= MNT_LOCAL;
853 MNT_IUNLOCK(mp);
854 if ((fs->fs_flags & FS_MULTILABEL) != 0) {
855#ifdef MAC
856 MNT_ILOCK(mp);
857 mp->mnt_flag |= MNT_MULTILABEL;
858 MNT_IUNLOCK(mp);
859#else
860 printf(
861"WARNING: %s: multilabel flag on fs but no MAC support\n",
862 mp->mnt_stat.f_mntonname);
863#endif
864 }
865 if ((fs->fs_flags & FS_ACLS) != 0) {
866#ifdef UFS_ACL
867 MNT_ILOCK(mp);
868
869 if (mp->mnt_flag & MNT_NFS4ACLS)
870 printf("WARNING: ACLs flag on fs conflicts with "
871 "\"nfsv4acls\" mount option; option ignored\n");
872 mp->mnt_flag &= ~MNT_NFS4ACLS;
873 mp->mnt_flag |= MNT_ACLS;
874
875 MNT_IUNLOCK(mp);
876#else
877 printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
878 mp->mnt_stat.f_mntonname);
879#endif
880 }
881 if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
882#ifdef UFS_ACL
883 MNT_ILOCK(mp);
884
885 if (mp->mnt_flag & MNT_ACLS)
886 printf("WARNING: NFSv4 ACLs flag on fs conflicts with "
887 "\"acls\" mount option; option ignored\n");
888 mp->mnt_flag &= ~MNT_ACLS;
889 mp->mnt_flag |= MNT_NFS4ACLS;
890
891 MNT_IUNLOCK(mp);
892#else
893 printf(
894"WARNING: %s: NFSv4 ACLs flag on fs but no ACLs support\n",
895 mp->mnt_stat.f_mntonname);
896#endif
897 }
898
899 ump->um_mountp = mp;
900 ump->um_dev = dev;
901 ump->um_devvp = devvp;
902 ump->um_nindir = fs->fs_nindir;
903 ump->um_bptrtodb = fs->fs_fsbtodb;
904 ump->um_seqinc = fs->fs_frag;
905 for (i = 0; i < MAXQUOTAS; i++)
906 ump->um_quotas[i] = NULLVP;
907#ifdef UFS_EXTATTR
908 ufs_extattr_uepm_init(&ump->um_extattr);
909#endif
910 /*
911 * Set FS local "last mounted on" information (NULL pad)
912 */
913 bzero(fs->fs_fsmnt, MAXMNTLEN);
914 strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
915 mp->mnt_stat.f_iosize = fs->fs_bsize;
916
917 if( mp->mnt_flag & MNT_ROOTFS) {
918 /*
919 * Root mount; update timestamp in mount structure.
920 * this will be used by the common root mount code
921 * to update the system clock.
922 */
923 mp->mnt_time = fs->fs_time;
924 }
925
926 if (ronly == 0) {
927 fs->fs_mtime = time_second;
928 if ((fs->fs_flags & FS_DOSOFTDEP) &&
929 (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
930 free(fs->fs_csp, M_UFSMNT);
931 goto out;
932 }
933 if (fs->fs_snapinum[0] != 0)
934 ffs_snapshot_mount(mp);
935 fs->fs_fmod = 1;
936 fs->fs_clean = 0;
937 (void) ffs_sbupdate(ump, MNT_WAIT, 0);
938 }
939 /*
940 * Initialize filesystem stat information in mount struct.
941 */
942 MNT_ILOCK(mp);
943 mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
944 MNTK_EXTENDED_SHARED;
945 MNT_IUNLOCK(mp);
946#ifdef UFS_EXTATTR
947#ifdef UFS_EXTATTR_AUTOSTART
948 /*
949 *
950 * Auto-starting does the following:
951 * - check for /.attribute in the fs, and extattr_start if so
952 * - for each file in .attribute, enable that file with
953 * an attribute of the same name.
954 * Not clear how to report errors -- probably eat them.
955 * This would all happen while the filesystem was busy/not
956 * available, so would effectively be "atomic".
957 */
958 (void) ufs_extattr_autostart(mp, td);
959#endif /* !UFS_EXTATTR_AUTOSTART */
960#endif /* !UFS_EXTATTR */
961 return (0);
962out:
963 if (bp)
964 brelse(bp);
965 if (cp != NULL) {
966 DROP_GIANT();
967 g_topology_lock();
968 g_vfs_close(cp);
969 g_topology_unlock();
970 PICKUP_GIANT();
971 }
972 if (ump) {
973 mtx_destroy(UFS_MTX(ump));
974 if (mp->mnt_gjprovider != NULL) {
975 free(mp->mnt_gjprovider, M_UFSMNT);
976 mp->mnt_gjprovider = NULL;
977 }
978 free(ump->um_fs, M_UFSMNT);
979 free(ump, M_UFSMNT);
980 mp->mnt_data = NULL;
981 }
982 dev_rel(dev);
983 return (error);
984}
985
986#include <sys/sysctl.h>
987static int bigcgs = 0;
988SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
989
990/*
991 * Sanity checks for loading old filesystem superblocks.
992 * See ffs_oldfscompat_write below for unwound actions.
993 *
994 * XXX - Parts get retired eventually.
995 * Unfortunately new bits get added.
996 */
997static void
998ffs_oldfscompat_read(fs, ump, sblockloc)
999 struct fs *fs;
1000 struct ufsmount *ump;
1001 ufs2_daddr_t sblockloc;
1002{
1003 off_t maxfilesize;
1004
1005 /*
1006 * If not yet done, update fs_flags location and value of fs_sblockloc.
1007 */
1008 if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1009 fs->fs_flags = fs->fs_old_flags;
1010 fs->fs_old_flags |= FS_FLAGS_UPDATED;
1011 fs->fs_sblockloc = sblockloc;
1012 }
1013 /*
1014 * If not yet done, update UFS1 superblock with new wider fields.
1015 */
1016 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1017 fs->fs_maxbsize = fs->fs_bsize;
1018 fs->fs_time = fs->fs_old_time;
1019 fs->fs_size = fs->fs_old_size;
1020 fs->fs_dsize = fs->fs_old_dsize;
1021 fs->fs_csaddr = fs->fs_old_csaddr;
1022 fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1023 fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1024 fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1025 fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1026 }
1027 if (fs->fs_magic == FS_UFS1_MAGIC &&
1028 fs->fs_old_inodefmt < FS_44INODEFMT) {
1029 fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1030 fs->fs_qbmask = ~fs->fs_bmask;
1031 fs->fs_qfmask = ~fs->fs_fmask;
1032 }
1033 if (fs->fs_magic == FS_UFS1_MAGIC) {
1034 ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1035 maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1036 if (fs->fs_maxfilesize > maxfilesize)
1037 fs->fs_maxfilesize = maxfilesize;
1038 }
1039 /* Compatibility for old filesystems */
1040 if (fs->fs_avgfilesize <= 0)
1041 fs->fs_avgfilesize = AVFILESIZ;
1042 if (fs->fs_avgfpdir <= 0)
1043 fs->fs_avgfpdir = AFPDIR;
1044 if (bigcgs) {
1045 fs->fs_save_cgsize = fs->fs_cgsize;
1046 fs->fs_cgsize = fs->fs_bsize;
1047 }
1048}
1049
1050/*
1051 * Unwinding superblock updates for old filesystems.
1052 * See ffs_oldfscompat_read above for details.
1053 *
1054 * XXX - Parts get retired eventually.
1055 * Unfortunately new bits get added.
1056 */
1057void
1058ffs_oldfscompat_write(fs, ump)
1059 struct fs *fs;
1060 struct ufsmount *ump;
1061{
1062
1063 /*
1064 * Copy back UFS2 updated fields that UFS1 inspects.
1065 */
1066 if (fs->fs_magic == FS_UFS1_MAGIC) {
1067 fs->fs_old_time = fs->fs_time;
1068 fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1069 fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1070 fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1071 fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1072 fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1073 }
1074 if (bigcgs) {
1075 fs->fs_cgsize = fs->fs_save_cgsize;
1076 fs->fs_save_cgsize = 0;
1077 }
1078}
1079
1080/*
1081 * unmount system call
1082 */
1083static int
1084ffs_unmount(mp, mntflags)
1085 struct mount *mp;
1086 int mntflags;
1087{
1088 struct thread *td;
1089 struct ufsmount *ump = VFSTOUFS(mp);
1090 struct fs *fs;
1091 int error, flags, susp;
1092#ifdef UFS_EXTATTR
1093 int e_restart;
1094#endif
1095
1096 flags = 0;
1097 td = curthread;
1098 fs = ump->um_fs;
1099 if (mntflags & MNT_FORCE) {
1100 flags |= FORCECLOSE;
1101 susp = fs->fs_ronly != 0;
1102 } else
1103 susp = 0;
1104#ifdef UFS_EXTATTR
1105 if ((error = ufs_extattr_stop(mp, td))) {
1106 if (error != EOPNOTSUPP)
1107 printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1108 error);
1109 e_restart = 0;
1110 } else {
1111 ufs_extattr_uepm_destroy(&ump->um_extattr);
1112 e_restart = 1;
1113 }
1114#endif
1115 if (susp) {
1116 /*
1117 * dounmount already called vn_start_write().
1118 */
1119 for (;;) {
1120 vn_finished_write(mp);
1121 if ((error = vfs_write_suspend(mp)) != 0)
1122 return (error);
1123 MNT_ILOCK(mp);
1124 if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1125 mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1126 MNTK_SUSPEND2);
1127 wakeup(&mp->mnt_flag);
1128 MNT_IUNLOCK(mp);
1129 td->td_pflags |= TDP_IGNSUSP;
1130 break;
1131 }
1132 MNT_IUNLOCK(mp);
1133 vn_start_write(NULL, &mp, V_WAIT);
1134 }
1135 }
1136 if (mp->mnt_flag & MNT_SOFTDEP)
1137 error = softdep_flushfiles(mp, flags, td);
1138 else
1139 error = ffs_flushfiles(mp, flags, td);
1140 if (error != 0 && error != ENXIO)
1141 goto fail;
1142
1143 UFS_LOCK(ump);
1144 if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1145 printf("%s: unmount pending error: blocks %jd files %d\n",
1146 fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1147 fs->fs_pendinginodes);
1148 fs->fs_pendingblocks = 0;
1149 fs->fs_pendinginodes = 0;
1150 }
1151 UFS_UNLOCK(ump);
1152 softdep_unmount(mp);
1153 if (fs->fs_ronly == 0) {
1154 fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1155 error = ffs_sbupdate(ump, MNT_WAIT, 0);
1156 if (error && error != ENXIO) {
1157 fs->fs_clean = 0;
1158 goto fail;
1159 }
1160 }
1161 if (susp) {
1162 vfs_write_resume(mp);
1163 vn_start_write(NULL, &mp, V_WAIT);
1164 }
1165 DROP_GIANT();
1166 g_topology_lock();
1167 g_vfs_close(ump->um_cp);
1168 g_topology_unlock();
1169 PICKUP_GIANT();
1170 vrele(ump->um_devvp);
1171 dev_rel(ump->um_dev);
1172 mtx_destroy(UFS_MTX(ump));
1173 if (mp->mnt_gjprovider != NULL) {
1174 free(mp->mnt_gjprovider, M_UFSMNT);
1175 mp->mnt_gjprovider = NULL;
1176 }
1177 free(fs->fs_csp, M_UFSMNT);
1178 free(fs, M_UFSMNT);
1179 free(ump, M_UFSMNT);
1180 mp->mnt_data = NULL;
1181 MNT_ILOCK(mp);
1182 mp->mnt_flag &= ~MNT_LOCAL;
1183 MNT_IUNLOCK(mp);
1184 return (error);
1185
1186fail:
1187 if (susp) {
1188 vfs_write_resume(mp);
1189 vn_start_write(NULL, &mp, V_WAIT);
1190 }
1191#ifdef UFS_EXTATTR
1192 if (e_restart) {
1193 ufs_extattr_uepm_init(&ump->um_extattr);
1194#ifdef UFS_EXTATTR_AUTOSTART
1195 (void) ufs_extattr_autostart(mp, td);
1196#endif
1197 }
1198#endif
1199
1200 return (error);
1201}
1202
1203/*
1204 * Flush out all the files in a filesystem.
1205 */
1206int
1207ffs_flushfiles(mp, flags, td)
1208 struct mount *mp;
1209 int flags;
1210 struct thread *td;
1211{
1212 struct ufsmount *ump;
1213 int error;
1214
1215 ump = VFSTOUFS(mp);
1216#ifdef QUOTA
1217 if (mp->mnt_flag & MNT_QUOTA) {
1218 int i;
1219 error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1220 if (error)
1221 return (error);
1222 for (i = 0; i < MAXQUOTAS; i++) {
1223 quotaoff(td, mp, i);
1224 }
1225 /*
1226 * Here we fall through to vflush again to ensure
1227 * that we have gotten rid of all the system vnodes.
1228 */
1229 }
1230#endif
1231 ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1232 if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1233 if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1234 return (error);
1235 ffs_snapshot_unmount(mp);
1236 flags |= FORCECLOSE;
1237 /*
1238 * Here we fall through to vflush again to ensure
1239 * that we have gotten rid of all the system vnodes.
1240 */
1241 }
1242 /*
1243 * Flush all the files.
1244 */
1245 if ((error = vflush(mp, 0, flags, td)) != 0)
1246 return (error);
1247 /*
1248 * Flush filesystem metadata.
1249 */
1250 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1251 error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1252 VOP_UNLOCK(ump->um_devvp, 0);
1253 return (error);
1254}
1255
1256/*
1257 * Get filesystem statistics.
1258 */
1259static int
1260ffs_statfs(mp, sbp)
1261 struct mount *mp;
1262 struct statfs *sbp;
1263{
1264 struct ufsmount *ump;
1265 struct fs *fs;
1266
1267 ump = VFSTOUFS(mp);
1268 fs = ump->um_fs;
1269 if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1270 panic("ffs_statfs");
1271 sbp->f_version = STATFS_VERSION;
1272 sbp->f_bsize = fs->fs_fsize;
1273 sbp->f_iosize = fs->fs_bsize;
1274 sbp->f_blocks = fs->fs_dsize;
1275 UFS_LOCK(ump);
1276 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1277 fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1278 sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1279 dbtofsb(fs, fs->fs_pendingblocks);
1280 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
1281 sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1282 UFS_UNLOCK(ump);
1283 sbp->f_namemax = NAME_MAX;
1284 return (0);
1285}
1286
1287/*
1288 * Go through the disk queues to initiate sandbagged IO;
1289 * go through the inodes to write those that have been modified;
1290 * initiate the writing of the super block if it has been modified.
1291 *
1292 * Note: we are always called with the filesystem marked `MPBUSY'.
1293 */
1294static int
1295ffs_sync(mp, waitfor)
1296 struct mount *mp;
1297 int waitfor;
1298{
1299 struct vnode *mvp, *vp, *devvp;
1300 struct thread *td;
1301 struct inode *ip;
1302 struct ufsmount *ump = VFSTOUFS(mp);
1303 struct fs *fs;
1304 int error, count, wait, lockreq, allerror = 0;
1305 int suspend;
1306 int suspended;
1307 int secondary_writes;
1308 int secondary_accwrites;
1309 int softdep_deps;
1310 int softdep_accdeps;
1311 struct bufobj *bo;
1312
1313 td = curthread;
1314 fs = ump->um_fs;
1315 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
1316 printf("fs = %s\n", fs->fs_fsmnt);
1317 panic("ffs_sync: rofs mod");
1318 }
1319 /*
1320 * Write back each (modified) inode.
1321 */
1322 wait = 0;
1323 suspend = 0;
1324 suspended = 0;
1325 lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1326 if (waitfor == MNT_SUSPEND) {
1327 suspend = 1;
1328 waitfor = MNT_WAIT;
1329 }
1330 if (waitfor == MNT_WAIT) {
1331 wait = 1;
1332 lockreq = LK_EXCLUSIVE;
1333 }
1334 lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1335 MNT_ILOCK(mp);
1336loop:
1337 /* Grab snapshot of secondary write counts */
1338 secondary_writes = mp->mnt_secondary_writes;
1339 secondary_accwrites = mp->mnt_secondary_accwrites;
1340
1341 /* Grab snapshot of softdep dependency counts */
1342 MNT_IUNLOCK(mp);
1343 softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1344 MNT_ILOCK(mp);
1345
1346 MNT_VNODE_FOREACH(vp, mp, mvp) {
1347 /*
1348 * Depend on the mntvnode_slock to keep things stable enough
1349 * for a quick test. Since there might be hundreds of
1350 * thousands of vnodes, we cannot afford even a subroutine
1351 * call unless there's a good chance that we have work to do.
1352 */
1353 VI_LOCK(vp);
1354 if (vp->v_iflag & VI_DOOMED) {
1355 VI_UNLOCK(vp);
1356 continue;
1357 }
1358 ip = VTOI(vp);
1359 if (vp->v_type == VNON || ((ip->i_flag &
1360 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1361 vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1362 VI_UNLOCK(vp);
1363 continue;
1364 }
1365 MNT_IUNLOCK(mp);
1366 if ((error = vget(vp, lockreq, td)) != 0) {
1367 MNT_ILOCK(mp);
1368 if (error == ENOENT || error == ENOLCK) {
1369 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1370 goto loop;
1371 }
1372 continue;
1373 }
1374 if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1375 allerror = error;
1376 vput(vp);
1377 MNT_ILOCK(mp);
1378 }
1379 MNT_IUNLOCK(mp);
1380 /*
1381 * Force stale filesystem control information to be flushed.
1382 */
1383 if (waitfor == MNT_WAIT) {
1384 if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1385 allerror = error;
1386 /* Flushed work items may create new vnodes to clean */
1387 if (allerror == 0 && count) {
1388 MNT_ILOCK(mp);
1389 goto loop;
1390 }
1391 }
1392#ifdef QUOTA
1393 qsync(mp);
1394#endif
1395 devvp = ump->um_devvp;
1396 bo = &devvp->v_bufobj;
1397 BO_LOCK(bo);
1398 if (waitfor != MNT_LAZY &&
1399 (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1400 BO_UNLOCK(bo);
1401 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1402 if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1403 allerror = error;
1404 VOP_UNLOCK(devvp, 0);
1405 if (allerror == 0 && waitfor == MNT_WAIT) {
1406 MNT_ILOCK(mp);
1407 goto loop;
1408 }
1409 } else if (suspend != 0) {
1410 if (softdep_check_suspend(mp,
1411 devvp,
1412 softdep_deps,
1413 softdep_accdeps,
1414 secondary_writes,
1415 secondary_accwrites) != 0)
1416 goto loop; /* More work needed */
1417 mtx_assert(MNT_MTX(mp), MA_OWNED);
1418 mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1419 MNT_IUNLOCK(mp);
1420 suspended = 1;
1421 } else
1422 BO_UNLOCK(bo);
1423 /*
1424 * Write back modified superblock.
1425 */
1426 if (fs->fs_fmod != 0 &&
1427 (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1428 allerror = error;
1429 return (allerror);
1430}
1431
1432int
1433ffs_vget(mp, ino, flags, vpp)
1434 struct mount *mp;
1435 ino_t ino;
1436 int flags;
1437 struct vnode **vpp;
1438{
1439 return (ffs_vgetf(mp, ino, flags, vpp, 0));
1440}
1441
1442int
1443ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1444 struct mount *mp;
1445 ino_t ino;
1446 int flags;
1447 struct vnode **vpp;
1448 int ffs_flags;
1449{
1450 struct fs *fs;
1451 struct inode *ip;
1452 struct ufsmount *ump;
1453 struct buf *bp;
1454 struct vnode *vp;
1455 struct cdev *dev;
1456 int error;
1457
1458 error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1459 if (error || *vpp != NULL)
1460 return (error);
1461
1462 /*
1463 * We must promote to an exclusive lock for vnode creation. This
1464 * can happen if lookup is passed LOCKSHARED.
1465 */
1466 if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1467 flags &= ~LK_TYPE_MASK;
1468 flags |= LK_EXCLUSIVE;
1469 }
1470
1471 /*
1472 * We do not lock vnode creation as it is believed to be too
1473 * expensive for such rare case as simultaneous creation of vnode
1474 * for same ino by different processes. We just allow them to race
1475 * and check later to decide who wins. Let the race begin!
1476 */
1477
1478 ump = VFSTOUFS(mp);
1479 dev = ump->um_dev;
1480 fs = ump->um_fs;
1481
1482 /*
1483 * If this malloc() is performed after the getnewvnode()
1484 * it might block, leaving a vnode with a NULL v_data to be
1485 * found by ffs_sync() if a sync happens to fire right then,
1486 * which will cause a panic because ffs_sync() blindly
1487 * dereferences vp->v_data (as well it should).
1488 */
1489 ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1490
1491 /* Allocate a new vnode/inode. */
1492 if (fs->fs_magic == FS_UFS1_MAGIC)
1493 error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1494 else
1495 error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1496 if (error) {
1497 *vpp = NULL;
1498 uma_zfree(uma_inode, ip);
1499 return (error);
1500 }
1501 /*
1502 * FFS supports recursive locking.
1503 */
1504 lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1505 VN_LOCK_AREC(vp);
1506 vp->v_data = ip;
1507 vp->v_bufobj.bo_bsize = fs->fs_bsize;
1508 ip->i_vnode = vp;
1509 ip->i_ump = ump;
1510 ip->i_fs = fs;
1511 ip->i_dev = dev;
1512 ip->i_number = ino;
1513 ip->i_ea_refs = 0;
1514#ifdef QUOTA
1515 {
1516 int i;
1517 for (i = 0; i < MAXQUOTAS; i++)
1518 ip->i_dquot[i] = NODQUOT;
1519 }
1520#endif
1521
1522 if (ffs_flags & FFSV_FORCEINSMQ)
1523 vp->v_vflag |= VV_FORCEINSMQ;
1524 error = insmntque(vp, mp);
1525 if (error != 0) {
1526 uma_zfree(uma_inode, ip);
1527 *vpp = NULL;
1528 return (error);
1529 }
1530 vp->v_vflag &= ~VV_FORCEINSMQ;
1531 error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1532 if (error || *vpp != NULL)
1533 return (error);
1534
1535 /* Read in the disk contents for the inode, copy into the inode. */
1536 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1537 (int)fs->fs_bsize, NOCRED, &bp);
1538 if (error) {
1539 /*
1540 * The inode does not contain anything useful, so it would
1541 * be misleading to leave it on its hash chain. With mode
1542 * still zero, it will be unlinked and returned to the free
1543 * list by vput().
1544 */
1545 brelse(bp);
1546 vput(vp);
1547 *vpp = NULL;
1548 return (error);
1549 }
1550 if (ip->i_ump->um_fstype == UFS1)
1551 ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1552 else
1553 ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1554 ffs_load_inode(bp, ip, fs, ino);
1555 if (DOINGSOFTDEP(vp))
1556 softdep_load_inodeblock(ip);
1557 else
1558 ip->i_effnlink = ip->i_nlink;
1559 bqrelse(bp);
1560
1561 /*
1562 * Initialize the vnode from the inode, check for aliases.
1563 * Note that the underlying vnode may have changed.
1564 */
1565 if (ip->i_ump->um_fstype == UFS1)
1566 error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1567 else
1568 error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1569 if (error) {
1570 vput(vp);
1571 *vpp = NULL;
1572 return (error);
1573 }
1574
1575 /*
1576 * Finish inode initialization.
1577 */
1578 if (vp->v_type != VFIFO) {
1579 /* FFS supports shared locking for all files except fifos. */
1580 VN_LOCK_ASHARE(vp);
1581 }
1582
1583 /*
1584 * Set up a generation number for this inode if it does not
1585 * already have one. This should only happen on old filesystems.
1586 */
1587 if (ip->i_gen == 0) {
1588 ip->i_gen = arc4random() / 2 + 1;
1589 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1590 ip->i_flag |= IN_MODIFIED;
1591 DIP_SET(ip, i_gen, ip->i_gen);
1592 }
1593 }
1594#ifdef MAC
1595 if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1596 /*
1597 * If this vnode is already allocated, and we're running
1598 * multi-label, attempt to perform a label association
1599 * from the extended attributes on the inode.
1600 */
1601 error = mac_vnode_associate_extattr(mp, vp);
1602 if (error) {
1603 /* ufs_inactive will release ip->i_devvp ref. */
1604 vput(vp);
1605 *vpp = NULL;
1606 return (error);
1607 }
1608 }
1609#endif
1610
1611 *vpp = vp;
1612 return (0);
1613}
1614
1615/*
1616 * File handle to vnode
1617 *
1618 * Have to be really careful about stale file handles:
1619 * - check that the inode number is valid
1620 * - call ffs_vget() to get the locked inode
1621 * - check for an unallocated inode (i_mode == 0)
1622 * - check that the given client host has export rights and return
1623 * those rights via. exflagsp and credanonp
1624 */
1625static int
1626ffs_fhtovp(mp, fhp, vpp)
1627 struct mount *mp;
1628 struct fid *fhp;
1629 struct vnode **vpp;
1630{
1631 struct ufid *ufhp;
1632 struct fs *fs;
1633
1634 ufhp = (struct ufid *)fhp;
1635 fs = VFSTOUFS(mp)->um_fs;
1636 if (ufhp->ufid_ino < ROOTINO ||
1637 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1638 return (ESTALE);
1639 return (ufs_fhtovp(mp, ufhp, vpp));
1640}
1641
1642/*
1643 * Initialize the filesystem.
1644 */
1645static int
1646ffs_init(vfsp)
1647 struct vfsconf *vfsp;
1648{
1649
1650 softdep_initialize();
1651 return (ufs_init(vfsp));
1652}
1653
1654/*
1655 * Undo the work of ffs_init().
1656 */
1657static int
1658ffs_uninit(vfsp)
1659 struct vfsconf *vfsp;
1660{
1661 int ret;
1662
1663 ret = ufs_uninit(vfsp);
1664 softdep_uninitialize();
1665 return (ret);
1666}
1667
1668/*
1669 * Write a superblock and associated information back to disk.
1670 */
1671int
1672ffs_sbupdate(mp, waitfor, suspended)
1673 struct ufsmount *mp;
1674 int waitfor;
1675 int suspended;
1676{
1677 struct fs *fs = mp->um_fs;
1678 struct buf *sbbp;
1679 struct buf *bp;
1680 int blks;
1681 void *space;
1682 int i, size, error, allerror = 0;
1683
1684 if (fs->fs_ronly == 1 &&
1685 (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1686 (MNT_RDONLY | MNT_UPDATE))
1687 panic("ffs_sbupdate: write read-only filesystem");
1688 /*
1689 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1690 */
1691 sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1692 0, 0, 0);
1693 /*
1694 * First write back the summary information.
1695 */
1696 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1697 space = fs->fs_csp;
1698 for (i = 0; i < blks; i += fs->fs_frag) {
1699 size = fs->fs_bsize;
1700 if (i + fs->fs_frag > blks)
1701 size = (blks - i) * fs->fs_fsize;
1702 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1703 size, 0, 0, 0);
1704 bcopy(space, bp->b_data, (u_int)size);
1705 space = (char *)space + size;
1706 if (suspended)
1707 bp->b_flags |= B_VALIDSUSPWRT;
1708 if (waitfor != MNT_WAIT)
1709 bawrite(bp);
1710 else if ((error = bwrite(bp)) != 0)
1711 allerror = error;
1712 }
1713 /*
1714 * Now write back the superblock itself. If any errors occurred
1715 * up to this point, then fail so that the superblock avoids
1716 * being written out as clean.
1717 */
1718 if (allerror) {
1719 brelse(sbbp);
1720 return (allerror);
1721 }
1722 bp = sbbp;
1723 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1724 (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1725 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1726 fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1727 fs->fs_sblockloc = SBLOCK_UFS1;
1728 }
1729 if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1730 (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1731 printf("%s: correcting fs_sblockloc from %jd to %d\n",
1732 fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1733 fs->fs_sblockloc = SBLOCK_UFS2;
1734 }
1735 fs->fs_fmod = 0;
1736 fs->fs_time = time_second;
1737 if (fs->fs_flags & FS_DOSOFTDEP)
1738 softdep_setup_sbupdate(mp, (struct fs *)bp->b_data, bp);
1739 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1740 ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1741 if (suspended)
1742 bp->b_flags |= B_VALIDSUSPWRT;
1743 if (waitfor != MNT_WAIT)
1744 bawrite(bp);
1745 else if ((error = bwrite(bp)) != 0)
1746 allerror = error;
1747 return (allerror);
1748}
1749
1750static int
1751ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1752 int attrnamespace, const char *attrname)
1753{
1754
1755#ifdef UFS_EXTATTR
1756 return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1757 attrname));
1758#else
1759 return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1760 attrname));
1761#endif
1762}
1763
1764static void
1765ffs_ifree(struct ufsmount *ump, struct inode *ip)
1766{
1767
1768 if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1769 uma_zfree(uma_ufs1, ip->i_din1);
1770 else if (ip->i_din2 != NULL)
1771 uma_zfree(uma_ufs2, ip->i_din2);
1772 uma_zfree(uma_inode, ip);
1773}
1774
1775static int dobkgrdwrite = 1;
1776SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1777 "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1778
1779/*
1780 * Complete a background write started from bwrite.
1781 */
1782static void
1783ffs_backgroundwritedone(struct buf *bp)
1784{
1785 struct bufobj *bufobj;
1786 struct buf *origbp;
1787
1788 /*
1789 * Find the original buffer that we are writing.
1790 */
1791 bufobj = bp->b_bufobj;
1792 BO_LOCK(bufobj);
1793 if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1794 panic("backgroundwritedone: lost buffer");
1795 /* Grab an extra reference to be dropped by the bufdone() below. */
1796 bufobj_wrefl(bufobj);
1797 BO_UNLOCK(bufobj);
1798 /*
1799 * Process dependencies then return any unfinished ones.
1800 */
1801 if (!LIST_EMPTY(&bp->b_dep))
1802 buf_complete(bp);
1803#ifdef SOFTUPDATES
1804 if (!LIST_EMPTY(&bp->b_dep))
1805 softdep_move_dependencies(bp, origbp);
1806#endif
1807 /*
1808 * This buffer is marked B_NOCACHE so when it is released
1809 * by biodone it will be tossed.
1810 */
1811 bp->b_flags |= B_NOCACHE;
1812 bp->b_flags &= ~B_CACHE;
1813 bufdone(bp);
1814 BO_LOCK(bufobj);
1815 /*
1816 * Clear the BV_BKGRDINPROG flag in the original buffer
1817 * and awaken it if it is waiting for the write to complete.
1818 * If BV_BKGRDINPROG is not set in the original buffer it must
1819 * have been released and re-instantiated - which is not legal.
1820 */
1821 KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1822 ("backgroundwritedone: lost buffer2"));
1823 origbp->b_vflags &= ~BV_BKGRDINPROG;
1824 if (origbp->b_vflags & BV_BKGRDWAIT) {
1825 origbp->b_vflags &= ~BV_BKGRDWAIT;
1826 wakeup(&origbp->b_xflags);
1827 }
1828 BO_UNLOCK(bufobj);
1829}
1830
1831
1832/*
1833 * Write, release buffer on completion. (Done by iodone
1834 * if async). Do not bother writing anything if the buffer
1835 * is invalid.
1836 *
1837 * Note that we set B_CACHE here, indicating that buffer is
1838 * fully valid and thus cacheable. This is true even of NFS
1839 * now so we set it generally. This could be set either here
1840 * or in biodone() since the I/O is synchronous. We put it
1841 * here.
1842 */
1843static int
1844ffs_bufwrite(struct buf *bp)
1845{
1846 int oldflags, s;
1847 struct buf *newbp;
1848
1849 CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1850 if (bp->b_flags & B_INVAL) {
1851 brelse(bp);
1852 return (0);
1853 }
1854
1855 oldflags = bp->b_flags;
1856
1857 if (!BUF_ISLOCKED(bp))
1858 panic("bufwrite: buffer is not busy???");
1859 s = splbio();
1860 /*
1861 * If a background write is already in progress, delay
1862 * writing this block if it is asynchronous. Otherwise
1863 * wait for the background write to complete.
1864 */
1865 BO_LOCK(bp->b_bufobj);
1866 if (bp->b_vflags & BV_BKGRDINPROG) {
1867 if (bp->b_flags & B_ASYNC) {
1868 BO_UNLOCK(bp->b_bufobj);
1869 splx(s);
1870 bdwrite(bp);
1871 return (0);
1872 }
1873 bp->b_vflags |= BV_BKGRDWAIT;
1874 msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1875 if (bp->b_vflags & BV_BKGRDINPROG)
1876 panic("bufwrite: still writing");
1877 }
1878 BO_UNLOCK(bp->b_bufobj);
1879
1880 /*
1881 * If this buffer is marked for background writing and we
1882 * do not have to wait for it, make a copy and write the
1883 * copy so as to leave this buffer ready for further use.
1884 *
1885 * This optimization eats a lot of memory. If we have a page
1886 * or buffer shortfall we can't do it.
1887 */
1888 if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1889 (bp->b_flags & B_ASYNC) &&
1890 !vm_page_count_severe() &&
1891 !buf_dirty_count_severe()) {
1892 KASSERT(bp->b_iodone == NULL,
1893 ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1894
1895 /* get a new block */
1896 newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
1897 if (newbp == NULL)
1898 goto normal_write;
1899
1900 /*
1901 * set it to be identical to the old block. We have to
1902 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1903 * to avoid confusing the splay tree and gbincore().
1904 */
1905 memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1906 newbp->b_lblkno = bp->b_lblkno;
1907 newbp->b_xflags |= BX_BKGRDMARKER;
1908 BO_LOCK(bp->b_bufobj);
1909 bp->b_vflags |= BV_BKGRDINPROG;
1910 bgetvp(bp->b_vp, newbp);
1911 BO_UNLOCK(bp->b_bufobj);
1912 newbp->b_bufobj = &bp->b_vp->v_bufobj;
1913 newbp->b_blkno = bp->b_blkno;
1914 newbp->b_offset = bp->b_offset;
1915 newbp->b_iodone = ffs_backgroundwritedone;
1916 newbp->b_flags |= B_ASYNC;
1917 newbp->b_flags &= ~B_INVAL;
1918
1919#ifdef SOFTUPDATES
1920 /*
1921 * Move over the dependencies. If there are rollbacks,
1922 * leave the parent buffer dirtied as it will need to
1923 * be written again.
1924 */
1925 if (LIST_EMPTY(&bp->b_dep) ||
1926 softdep_move_dependencies(bp, newbp) == 0)
1927 bundirty(bp);
1928#else
1929 bundirty(bp);
1930#endif
1931
1932 /*
1933 * Initiate write on the copy, release the original to
1934 * the B_LOCKED queue so that it cannot go away until
1935 * the background write completes. If not locked it could go
1936 * away and then be reconstituted while it was being written.
1937 * If the reconstituted buffer were written, we could end up
1938 * with two background copies being written at the same time.
1939 */
1940 bqrelse(bp);
1941 bp = newbp;
1942 } else
1943 /* Mark the buffer clean */
1944 bundirty(bp);
1945
1946
1947 /* Let the normal bufwrite do the rest for us */
1948normal_write:
1949 return (bufwrite(bp));
1950}
1951
1952
1953static void
1954ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1955{
1956 struct vnode *vp;
1957 int error;
1958 struct buf *tbp;
1959 int nocopy;
1960
1961 vp = bo->__bo_vnode;
1962 if (bp->b_iocmd == BIO_WRITE) {
1963 if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1964 bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1965 (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1966 panic("ffs_geom_strategy: bad I/O");
1967 nocopy = bp->b_flags & B_NOCOPY;
1968 bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
1969 if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
1970 vp->v_rdev->si_snapdata != NULL) {
1971 if ((bp->b_flags & B_CLUSTER) != 0) {
1972 runningbufwakeup(bp);
1973 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1974 b_cluster.cluster_entry) {
1975 error = ffs_copyonwrite(vp, tbp);
1976 if (error != 0 &&
1977 error != EOPNOTSUPP) {
1978 bp->b_error = error;
1979 bp->b_ioflags |= BIO_ERROR;
1980 bufdone(bp);
1981 return;
1982 }
1983 }
1984 bp->b_runningbufspace = bp->b_bufsize;
1985 atomic_add_long(&runningbufspace,
1986 bp->b_runningbufspace);
1987 } else {
1988 error = ffs_copyonwrite(vp, bp);
1989 if (error != 0 && error != EOPNOTSUPP) {
1990 bp->b_error = error;
1991 bp->b_ioflags |= BIO_ERROR;
1992 bufdone(bp);
1993 return;
1994 }
1995 }
1996 }
1997#ifdef SOFTUPDATES
1998 if ((bp->b_flags & B_CLUSTER) != 0) {
1999 TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2000 b_cluster.cluster_entry) {
2001 if (!LIST_EMPTY(&tbp->b_dep))
2002 buf_start(tbp);
2003 }
2004 } else {
2005 if (!LIST_EMPTY(&bp->b_dep))
2006 buf_start(bp);
2007 }
2008
2009#endif
2010 }
2011 g_vfs_strategy(bo, bp);
2012}
2013
2014#ifdef DDB
2015
2016static void
2017db_print_ffs(struct ufsmount *ump)
2018{
2019 db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
2020 "su_req %d\n",
2021 ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
2022 ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
2023 ump->softdep_on_worklist_inprogress, ump->softdep_deps,
2024 ump->softdep_req);
2025}
2026
2027DB_SHOW_COMMAND(ffs, db_show_ffs)
2028{
2029 struct mount *mp;
2030 struct ufsmount *ump;
2031
2032 if (have_addr) {
2033 ump = VFSTOUFS((struct mount *)addr);
2034 db_print_ffs(ump);
2035 return;
2036 }
2037
2038 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2039 if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2040 db_print_ffs(VFSTOUFS(mp));
2041 }
2042}
2043
2044#endif /* DDB */