tmpfs_vfsops.c revision 182739
1/* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * Efficient memory file system. 35 * 36 * tmpfs is a file system that uses NetBSD's virtual memory sub-system 37 * (the well-known UVM) to store file data and metadata in an efficient 38 * way. This means that it does not follow the structure of an on-disk 39 * file system because it simply does not need to. Instead, it uses 40 * memory-specific data structures and algorithms to automatically 41 * allocate and release resources. 42 */ 43#include <sys/cdefs.h> 44__FBSDID("$FreeBSD: head/sys/fs/tmpfs/tmpfs_vfsops.c 182739 2008-09-03 18:53:48Z delphij $"); 45 46#include <sys/param.h> 47#include <sys/limits.h> 48#include <sys/lock.h> 49#include <sys/mutex.h> 50#include <sys/kernel.h> 51#include <sys/stat.h> 52#include <sys/systm.h> 53#include <sys/sysctl.h> 54 55#include <vm/vm.h> 56#include <vm/vm_object.h> 57#include <vm/vm_param.h> 58 59#include <fs/tmpfs/tmpfs.h> 60 61/* 62 * Default permission for root node 63 */ 64#define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 65 66MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 67MALLOC_DEFINE(M_TMPFSNAME, "tmpfs name", "tmpfs file names"); 68 69/* --------------------------------------------------------------------- */ 70 71static int tmpfs_mount(struct mount *, struct thread *); 72static int tmpfs_unmount(struct mount *, int, struct thread *); 73static int tmpfs_root(struct mount *, int flags, struct vnode **, 74 struct thread *); 75static int tmpfs_fhtovp(struct mount *, struct fid *, struct vnode **); 76static int tmpfs_statfs(struct mount *, struct statfs *, struct thread *); 77 78/* --------------------------------------------------------------------- */ 79 80static const char *tmpfs_opts[] = { 81 "from", "size", "inodes", "uid", "gid", "mode", "export", 82 NULL 83}; 84 85/* --------------------------------------------------------------------- */ 86 87#define SWI_MAXMIB 3 88 89static u_int 90get_swpgtotal(void) 91{ 92 struct xswdev xsd; 93 char *sname = "vm.swap_info"; 94 int soid[SWI_MAXMIB], oid[2]; 95 u_int unswdev, total, dmmax, nswapdev; 96 size_t mibi, len; 97 98 total = 0; 99 100 len = sizeof(dmmax); 101 if (kernel_sysctlbyname(curthread, "vm.dmmax", &dmmax, &len, 102 NULL, 0, NULL, 0) != 0) 103 return total; 104 105 len = sizeof(nswapdev); 106 if (kernel_sysctlbyname(curthread, "vm.nswapdev", 107 &nswapdev, &len, 108 NULL, 0, NULL, 0) != 0) 109 return total; 110 111 mibi = (SWI_MAXMIB - 1) * sizeof(int); 112 oid[0] = 0; 113 oid[1] = 3; 114 115 if (kernel_sysctl(curthread, oid, 2, 116 soid, &mibi, (void *)sname, strlen(sname), 117 NULL, 0) != 0) 118 return total; 119 120 mibi = (SWI_MAXMIB - 1); 121 for (unswdev = 0; unswdev < nswapdev; ++unswdev) { 122 soid[mibi] = unswdev; 123 len = sizeof(struct xswdev); 124 if (kernel_sysctl(curthread, 125 soid, mibi + 1, &xsd, &len, NULL, 0, 126 NULL, 0) != 0) 127 return total; 128 if (len == sizeof(struct xswdev)) 129 total += (xsd.xsw_nblks - dmmax); 130 } 131 132 /* Not Reached */ 133 return total; 134} 135 136/* --------------------------------------------------------------------- */ 137static int 138tmpfs_node_ctor(void *mem, int size, void *arg, int flags) 139{ 140 struct tmpfs_node *node = (struct tmpfs_node *)mem; 141 142 node->tn_gen++; 143 node->tn_size = 0; 144 node->tn_status = 0; 145 node->tn_flags = 0; 146 node->tn_links = 0; 147 node->tn_vnode = NULL; 148 node->tn_vpstate = 0; 149 150 return (0); 151} 152 153static void 154tmpfs_node_dtor(void *mem, int size, void *arg) 155{ 156 struct tmpfs_node *node = (struct tmpfs_node *)mem; 157 node->tn_type = VNON; 158} 159 160static int 161tmpfs_node_init(void *mem, int size, int flags) 162{ 163 struct tmpfs_node *node = (struct tmpfs_node *)mem; 164 node->tn_id = 0; 165 166 mtx_init(&node->tn_interlock, "tmpfs node interlock", NULL, MTX_DEF); 167 node->tn_gen = arc4random(); 168 169 return (0); 170} 171 172static void 173tmpfs_node_fini(void *mem, int size) 174{ 175 struct tmpfs_node *node = (struct tmpfs_node *)mem; 176 177 mtx_destroy(&node->tn_interlock); 178} 179 180static int 181tmpfs_mount(struct mount *mp, struct thread *td) 182{ 183 struct tmpfs_mount *tmp; 184 struct tmpfs_node *root; 185 size_t pages, mem_size; 186 ino_t nodes; 187 int error; 188 /* Size counters. */ 189 ino_t nodes_max; 190 size_t size_max; 191 192 /* Root node attributes. */ 193 uid_t root_uid; 194 gid_t root_gid; 195 mode_t root_mode; 196 197 struct vattr va; 198 199 if (vfs_filteropt(mp->mnt_optnew, tmpfs_opts)) 200 return (EINVAL); 201 202 if (mp->mnt_flag & MNT_UPDATE) { 203 /* XXX: There is no support yet to update file system 204 * settings. Should be added. */ 205 206 return EOPNOTSUPP; 207 } 208 209 printf("WARNING: TMPFS is considered to be a highly experimental " 210 "feature in FreeBSD.\n"); 211 212 vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY); 213 error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred); 214 VOP_UNLOCK(mp->mnt_vnodecovered, 0); 215 if (error) 216 return (error); 217 218 if (mp->mnt_cred->cr_ruid != 0 || 219 vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1) 220 root_gid = va.va_gid; 221 if (mp->mnt_cred->cr_ruid != 0 || 222 vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1) 223 root_uid = va.va_uid; 224 if (mp->mnt_cred->cr_ruid != 0 || 225 vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1) 226 root_mode = va.va_mode; 227 if (vfs_scanopt(mp->mnt_optnew, "inodes", "%d", &nodes_max) != 1) 228 nodes_max = 0; 229 if (vfs_scanopt(mp->mnt_optnew, "size", "%qu", &size_max) != 1) 230 size_max = 0; 231 232 /* Do not allow mounts if we do not have enough memory to preserve 233 * the minimum reserved pages. */ 234 mem_size = cnt.v_free_count + cnt.v_inactive_count + get_swpgtotal(); 235 mem_size -= mem_size > cnt.v_wire_count ? cnt.v_wire_count : mem_size; 236 if (mem_size < TMPFS_PAGES_RESERVED) 237 return ENOSPC; 238 239 /* Get the maximum number of memory pages this file system is 240 * allowed to use, based on the maximum size the user passed in 241 * the mount structure. A value of zero is treated as if the 242 * maximum available space was requested. */ 243 if (size_max < PAGE_SIZE || size_max >= SIZE_MAX) 244 pages = SIZE_MAX; 245 else 246 pages = howmany(size_max, PAGE_SIZE); 247 MPASS(pages > 0); 248 249 if (nodes_max <= 3) 250 nodes = 3 + pages * PAGE_SIZE / 1024; 251 else 252 nodes = nodes_max; 253 MPASS(nodes >= 3); 254 255 /* Allocate the tmpfs mount structure and fill it. */ 256 tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount), 257 M_TMPFSMNT, M_WAITOK | M_ZERO); 258 259 mtx_init(&tmp->allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF); 260 tmp->tm_nodes_max = nodes; 261 tmp->tm_nodes_inuse = 0; 262 tmp->tm_maxfilesize = (u_int64_t)(cnt.v_page_count + get_swpgtotal()) * PAGE_SIZE; 263 LIST_INIT(&tmp->tm_nodes_used); 264 265 tmp->tm_pages_max = pages; 266 tmp->tm_pages_used = 0; 267 tmp->tm_ino_unr = new_unrhdr(2, INT_MAX, &tmp->allnode_lock); 268 tmp->tm_dirent_pool = uma_zcreate("TMPFS dirent", 269 sizeof(struct tmpfs_dirent), 270 NULL, NULL, NULL, NULL, 271 UMA_ALIGN_PTR, 0); 272 tmp->tm_node_pool = uma_zcreate("TMPFS node", 273 sizeof(struct tmpfs_node), 274 tmpfs_node_ctor, tmpfs_node_dtor, 275 tmpfs_node_init, tmpfs_node_fini, 276 UMA_ALIGN_PTR, 0); 277 278 /* Allocate the root node. */ 279 error = tmpfs_alloc_node(tmp, VDIR, root_uid, 280 root_gid, root_mode & ALLPERMS, NULL, NULL, 281 VNOVAL, td, &root); 282 283 if (error != 0 || root == NULL) { 284 uma_zdestroy(tmp->tm_node_pool); 285 uma_zdestroy(tmp->tm_dirent_pool); 286 delete_unrhdr(tmp->tm_ino_unr); 287 free(tmp, M_TMPFSMNT); 288 return error; 289 } 290 KASSERT(root->tn_id == 2, ("tmpfs root with invalid ino: %d", root->tn_id)); 291 tmp->tm_root = root; 292 293 MNT_ILOCK(mp); 294 mp->mnt_flag |= MNT_LOCAL; 295 mp->mnt_kern_flag |= MNTK_MPSAFE; 296 MNT_IUNLOCK(mp); 297 298 mp->mnt_data = tmp; 299 mp->mnt_stat.f_namemax = MAXNAMLEN; 300 vfs_getnewfsid(mp); 301 vfs_mountedfrom(mp, "tmpfs"); 302 303 return 0; 304} 305 306/* --------------------------------------------------------------------- */ 307 308/* ARGSUSED2 */ 309static int 310tmpfs_unmount(struct mount *mp, int mntflags, struct thread *l) 311{ 312 int error; 313 int flags = 0; 314 struct tmpfs_mount *tmp; 315 struct tmpfs_node *node; 316 317 /* Handle forced unmounts. */ 318 if (mntflags & MNT_FORCE) 319 flags |= FORCECLOSE; 320 321 /* Finalize all pending I/O. */ 322 error = vflush(mp, 0, flags, l); 323 if (error != 0) 324 return error; 325 326 tmp = VFS_TO_TMPFS(mp); 327 328 /* Free all associated data. The loop iterates over the linked list 329 * we have containing all used nodes. For each of them that is 330 * a directory, we free all its directory entries. Note that after 331 * freeing a node, it will automatically go to the available list, 332 * so we will later have to iterate over it to release its items. */ 333 node = LIST_FIRST(&tmp->tm_nodes_used); 334 while (node != NULL) { 335 struct tmpfs_node *next; 336 337 if (node->tn_type == VDIR) { 338 struct tmpfs_dirent *de; 339 340 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 341 while (de != NULL) { 342 struct tmpfs_dirent *nde; 343 344 nde = TAILQ_NEXT(de, td_entries); 345 tmpfs_free_dirent(tmp, de, FALSE); 346 de = nde; 347 node->tn_size -= sizeof(struct tmpfs_dirent); 348 } 349 } 350 351 next = LIST_NEXT(node, tn_entries); 352 tmpfs_free_node(tmp, node); 353 node = next; 354 } 355 356 uma_zdestroy(tmp->tm_dirent_pool); 357 uma_zdestroy(tmp->tm_node_pool); 358 delete_unrhdr(tmp->tm_ino_unr); 359 360 mtx_destroy(&tmp->allnode_lock); 361 MPASS(tmp->tm_pages_used == 0); 362 MPASS(tmp->tm_nodes_inuse == 0); 363 364 /* Throw away the tmpfs_mount structure. */ 365 free(mp->mnt_data, M_TMPFSMNT); 366 mp->mnt_data = NULL; 367 368 MNT_ILOCK(mp); 369 mp->mnt_flag &= ~MNT_LOCAL; 370 MNT_IUNLOCK(mp); 371 return 0; 372} 373 374/* --------------------------------------------------------------------- */ 375 376static int 377tmpfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td) 378{ 379 int error; 380 error = tmpfs_alloc_vp(mp, VFS_TO_TMPFS(mp)->tm_root, flags, vpp, td); 381 382 if (!error) 383 (*vpp)->v_vflag |= VV_ROOT; 384 385 return error; 386} 387 388/* --------------------------------------------------------------------- */ 389 390static int 391tmpfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) 392{ 393 boolean_t found; 394 struct tmpfs_fid *tfhp; 395 struct tmpfs_mount *tmp; 396 struct tmpfs_node *node; 397 398 tmp = VFS_TO_TMPFS(mp); 399 400 tfhp = (struct tmpfs_fid *)fhp; 401 if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 402 return EINVAL; 403 404 if (tfhp->tf_id >= tmp->tm_nodes_max) 405 return EINVAL; 406 407 found = FALSE; 408 409 TMPFS_LOCK(tmp); 410 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 411 if (node->tn_id == tfhp->tf_id && 412 node->tn_gen == tfhp->tf_gen) { 413 found = TRUE; 414 break; 415 } 416 } 417 TMPFS_UNLOCK(tmp); 418 419 if (found) 420 return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp, curthread)); 421 422 return (EINVAL); 423} 424 425/* --------------------------------------------------------------------- */ 426 427/* ARGSUSED2 */ 428static int 429tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *l) 430{ 431 fsfilcnt_t freenodes; 432 struct tmpfs_mount *tmp; 433 434 tmp = VFS_TO_TMPFS(mp); 435 436 sbp->f_iosize = PAGE_SIZE; 437 sbp->f_bsize = PAGE_SIZE; 438 439 sbp->f_blocks = TMPFS_PAGES_MAX(tmp); 440 sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp); 441 442 freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_inuse, 443 TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node)); 444 445 sbp->f_files = freenodes + tmp->tm_nodes_inuse; 446 sbp->f_ffree = freenodes; 447 /* sbp->f_owner = tmp->tn_uid; */ 448 449 return 0; 450} 451 452/* --------------------------------------------------------------------- */ 453 454/* 455 * tmpfs vfs operations. 456 */ 457 458struct vfsops tmpfs_vfsops = { 459 .vfs_mount = tmpfs_mount, 460 .vfs_unmount = tmpfs_unmount, 461 .vfs_root = tmpfs_root, 462 .vfs_statfs = tmpfs_statfs, 463 .vfs_fhtovp = tmpfs_fhtovp, 464}; 465VFS_SET(tmpfs_vfsops, tmpfs, 0); 466