vfs_subr.c (250505) | vfs_subr.c (250551) |
---|---|
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. --- 25 unchanged lines hidden (view full) --- 34 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 35 */ 36 37/* 38 * External virtual filesystem routines 39 */ 40 41#include <sys/cdefs.h> | 1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. --- 25 unchanged lines hidden (view full) --- 34 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 35 */ 36 37/* 38 * External virtual filesystem routines 39 */ 40 41#include <sys/cdefs.h> |
42__FBSDID("$FreeBSD: head/sys/kern/vfs_subr.c 250505 2013-05-11 11:17:44Z kib $"); | 42__FBSDID("$FreeBSD: head/sys/kern/vfs_subr.c 250551 2013-05-12 04:05:01Z jeff $"); |
43 44#include "opt_compat.h" 45#include "opt_ddb.h" 46#include "opt_watchdog.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/bio.h> --- 9 unchanged lines hidden (view full) --- 60#include <sys/jail.h> 61#include <sys/kdb.h> 62#include <sys/kernel.h> 63#include <sys/kthread.h> 64#include <sys/lockf.h> 65#include <sys/malloc.h> 66#include <sys/mount.h> 67#include <sys/namei.h> | 43 44#include "opt_compat.h" 45#include "opt_ddb.h" 46#include "opt_watchdog.h" 47 48#include <sys/param.h> 49#include <sys/systm.h> 50#include <sys/bio.h> --- 9 unchanged lines hidden (view full) --- 60#include <sys/jail.h> 61#include <sys/kdb.h> 62#include <sys/kernel.h> 63#include <sys/kthread.h> 64#include <sys/lockf.h> 65#include <sys/malloc.h> 66#include <sys/mount.h> 67#include <sys/namei.h> |
68#include <sys/pctrie.h> |
|
68#include <sys/priv.h> 69#include <sys/reboot.h> 70#include <sys/rwlock.h> 71#include <sys/sched.h> 72#include <sys/sleepqueue.h> 73#include <sys/smp.h> 74#include <sys/stat.h> 75#include <sys/sysctl.h> --- 103 unchanged lines hidden (view full) --- 179 * numvnodes 180 * freevnodes 181 */ 182static struct mtx vnode_free_list_mtx; 183 184/* Publicly exported FS */ 185struct nfs_public nfs_pub; 186 | 69#include <sys/priv.h> 70#include <sys/reboot.h> 71#include <sys/rwlock.h> 72#include <sys/sched.h> 73#include <sys/sleepqueue.h> 74#include <sys/smp.h> 75#include <sys/stat.h> 76#include <sys/sysctl.h> --- 103 unchanged lines hidden (view full) --- 180 * numvnodes 181 * freevnodes 182 */ 183static struct mtx vnode_free_list_mtx; 184 185/* Publicly exported FS */ 186struct nfs_public nfs_pub; 187 |
188static uma_zone_t buf_trie_zone; 189 |
|
187/* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ 188static uma_zone_t vnode_zone; 189static uma_zone_t vnodepoll_zone; 190 191/* 192 * The workitem queue. 193 * 194 * It is useful to delay writes of file data and filesystem metadata --- 84 unchanged lines hidden (view full) --- 279#define VCANRECYCLE(vp) (((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) 280#define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) 281#define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt) 282 283/* Shift count for (uintptr_t)vp to initialize vp->v_hash. */ 284static int vnsz2log; 285 286/* | 190/* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ 191static uma_zone_t vnode_zone; 192static uma_zone_t vnodepoll_zone; 193 194/* 195 * The workitem queue. 196 * 197 * It is useful to delay writes of file data and filesystem metadata --- 84 unchanged lines hidden (view full) --- 282#define VCANRECYCLE(vp) (((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) 283#define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) 284#define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt) 285 286/* Shift count for (uintptr_t)vp to initialize vp->v_hash. */ 287static int vnsz2log; 288 289/* |
290 * Support for the bufobj clean & dirty pctrie. 291 */ 292static void * 293buf_trie_alloc(struct pctrie *ptree) 294{ 295 296 return uma_zalloc(buf_trie_zone, M_NOWAIT); 297} 298 299static void 300buf_trie_free(struct pctrie *ptree, void *node) 301{ 302 303 uma_zfree(buf_trie_zone, node); 304} 305PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); 306 307/* |
|
287 * Initialize the vnode management data structures. 288 * 289 * Reevaluate the following cap on the number of vnodes after the physical 290 * memory size exceeds 512GB. In the limit, as the physical memory size 291 * grows, the ratio of physical pages to vnodes approaches sixteen to one. 292 */ 293#ifndef MAXVNODES_MAX 294#define MAXVNODES_MAX (512 * (1024 * 1024 * 1024 / (int)PAGE_SIZE / 16)) --- 29 unchanged lines hidden (view full) --- 324 mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); 325 TAILQ_INIT(&vnode_free_list); 326 mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); 327 vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, 328 NULL, NULL, UMA_ALIGN_PTR, 0); 329 vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), 330 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 331 /* | 308 * Initialize the vnode management data structures. 309 * 310 * Reevaluate the following cap on the number of vnodes after the physical 311 * memory size exceeds 512GB. In the limit, as the physical memory size 312 * grows, the ratio of physical pages to vnodes approaches sixteen to one. 313 */ 314#ifndef MAXVNODES_MAX 315#define MAXVNODES_MAX (512 * (1024 * 1024 * 1024 / (int)PAGE_SIZE / 16)) --- 29 unchanged lines hidden (view full) --- 345 mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); 346 TAILQ_INIT(&vnode_free_list); 347 mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); 348 vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, 349 NULL, NULL, UMA_ALIGN_PTR, 0); 350 vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), 351 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 352 /* |
353 * Preallocate enough nodes to support one-per buf so that 354 * we can not fail an insert. reassignbuf() callers can not 355 * tolerate the insertion failure. 356 */ 357 buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), 358 NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, 359 UMA_ZONE_NOFREE | UMA_ZONE_VM); 360 uma_prealloc(buf_trie_zone, nbuf); 361 /* |
|
332 * Initialize the filesystem syncer. 333 */ 334 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 335 &syncer_mask); 336 syncer_maxdelay = syncer_mask + 1; 337 mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); 338 cv_init(&sync_wakeup, "syncer"); 339 for (i = 1; i <= sizeof(struct vnode); i <<= 1) --- 1131 unchanged lines hidden (view full) --- 1471 1472 bufobj_wwait(bo, 0, 0); 1473 BO_UNLOCK(bo); 1474 vnode_pager_setsize(vp, length); 1475 1476 return (0); 1477} 1478 | 362 * Initialize the filesystem syncer. 363 */ 364 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 365 &syncer_mask); 366 syncer_maxdelay = syncer_mask + 1; 367 mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); 368 cv_init(&sync_wakeup, "syncer"); 369 for (i = 1; i <= sizeof(struct vnode); i <<= 1) --- 1131 unchanged lines hidden (view full) --- 1501 1502 bufobj_wwait(bo, 0, 0); 1503 BO_UNLOCK(bo); 1504 vnode_pager_setsize(vp, length); 1505 1506 return (0); 1507} 1508 |
1479/* 1480 * buf_splay() - splay tree core for the clean/dirty list of buffers in 1481 * a vnode. 1482 * 1483 * NOTE: We have to deal with the special case of a background bitmap 1484 * buffer, a situation where two buffers will have the same logical 1485 * block offset. We want (1) only the foreground buffer to be accessed 1486 * in a lookup and (2) must differentiate between the foreground and 1487 * background buffer in the splay tree algorithm because the splay 1488 * tree cannot normally handle multiple entities with the same 'index'. 1489 * We accomplish this by adding differentiating flags to the splay tree's 1490 * numerical domain. 1491 */ 1492static 1493struct buf * 1494buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root) 1495{ 1496 struct buf dummy; 1497 struct buf *lefttreemax, *righttreemin, *y; 1498 1499 if (root == NULL) 1500 return (NULL); 1501 lefttreemax = righttreemin = &dummy; 1502 for (;;) { 1503 if (lblkno < root->b_lblkno) { 1504 if ((y = root->b_left) == NULL) 1505 break; 1506 if (lblkno < y->b_lblkno) { 1507 /* Rotate right. */ 1508 root->b_left = y->b_right; 1509 y->b_right = root; 1510 root = y; 1511 if ((y = root->b_left) == NULL) 1512 break; 1513 } 1514 /* Link into the new root's right tree. */ 1515 righttreemin->b_left = root; 1516 righttreemin = root; 1517 } else if (lblkno > root->b_lblkno) { 1518 if ((y = root->b_right) == NULL) 1519 break; 1520 if (lblkno > y->b_lblkno) { 1521 /* Rotate left. */ 1522 root->b_right = y->b_left; 1523 y->b_left = root; 1524 root = y; 1525 if ((y = root->b_right) == NULL) 1526 break; 1527 } 1528 /* Link into the new root's left tree. */ 1529 lefttreemax->b_right = root; 1530 lefttreemax = root; 1531 } else { 1532 break; 1533 } 1534 root = y; 1535 } 1536 /* Assemble the new root. */ 1537 lefttreemax->b_right = root->b_left; 1538 righttreemin->b_left = root->b_right; 1539 root->b_left = dummy.b_right; 1540 root->b_right = dummy.b_left; 1541 return (root); 1542} 1543 | |
1544static void 1545buf_vlist_remove(struct buf *bp) 1546{ | 1509static void 1510buf_vlist_remove(struct buf *bp) 1511{ |
1547 struct buf *root; | |
1548 struct bufv *bv; 1549 1550 KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); 1551 ASSERT_BO_LOCKED(bp->b_bufobj); 1552 KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != 1553 (BX_VNDIRTY|BX_VNCLEAN), 1554 ("buf_vlist_remove: Buf %p is on two lists", bp)); 1555 if (bp->b_xflags & BX_VNDIRTY) 1556 bv = &bp->b_bufobj->bo_dirty; 1557 else 1558 bv = &bp->b_bufobj->bo_clean; | 1512 struct bufv *bv; 1513 1514 KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); 1515 ASSERT_BO_LOCKED(bp->b_bufobj); 1516 KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != 1517 (BX_VNDIRTY|BX_VNCLEAN), 1518 ("buf_vlist_remove: Buf %p is on two lists", bp)); 1519 if (bp->b_xflags & BX_VNDIRTY) 1520 bv = &bp->b_bufobj->bo_dirty; 1521 else 1522 bv = &bp->b_bufobj->bo_clean; |
1559 if (bp != bv->bv_root) { 1560 root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); 1561 KASSERT(root == bp, ("splay lookup failed in remove")); 1562 } 1563 if (bp->b_left == NULL) { 1564 root = bp->b_right; 1565 } else { 1566 root = buf_splay(bp->b_lblkno, bp->b_xflags, bp->b_left); 1567 root->b_right = bp->b_right; 1568 } 1569 bv->bv_root = root; | 1523 BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno); |
1570 TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); 1571 bv->bv_cnt--; 1572 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 1573} 1574 1575/* | 1524 TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); 1525 bv->bv_cnt--; 1526 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 1527} 1528 1529/* |
1576 * Add the buffer to the sorted clean or dirty block list using a 1577 * splay tree algorithm. | 1530 * Add the buffer to the sorted clean or dirty block list. |
1578 * 1579 * NOTE: xflags is passed as a constant, optimizing this inline function! 1580 */ 1581static void 1582buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) 1583{ | 1531 * 1532 * NOTE: xflags is passed as a constant, optimizing this inline function! 1533 */ 1534static void 1535buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) 1536{ |
1584 struct buf *root; | |
1585 struct bufv *bv; | 1537 struct bufv *bv; |
1538 struct buf *n; 1539 int error; |
|
1586 1587 ASSERT_BO_LOCKED(bo); 1588 KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, 1589 ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); 1590 bp->b_xflags |= xflags; 1591 if (xflags & BX_VNDIRTY) 1592 bv = &bo->bo_dirty; 1593 else 1594 bv = &bo->bo_clean; 1595 | 1540 1541 ASSERT_BO_LOCKED(bo); 1542 KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, 1543 ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); 1544 bp->b_xflags |= xflags; 1545 if (xflags & BX_VNDIRTY) 1546 bv = &bo->bo_dirty; 1547 else 1548 bv = &bo->bo_clean; 1549 |
1596 root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); 1597 if (root == NULL) { 1598 bp->b_left = NULL; 1599 bp->b_right = NULL; | 1550 /* 1551 * Keep the list ordered. Optimize empty list insertion. Assume 1552 * we tend to grow at the tail so lookup_le should usually be cheaper 1553 * than _ge. 1554 */ 1555 if (bv->bv_cnt == 0 || 1556 bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno) |
1600 TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); | 1557 TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); |
1601 } else if (bp->b_lblkno < root->b_lblkno) { 1602 bp->b_left = root->b_left; 1603 bp->b_right = root; 1604 root->b_left = NULL; 1605 TAILQ_INSERT_BEFORE(root, bp, b_bobufs); 1606 } else { 1607 bp->b_right = root->b_right; 1608 bp->b_left = root; 1609 root->b_right = NULL; 1610 TAILQ_INSERT_AFTER(&bv->bv_hd, root, bp, b_bobufs); 1611 } | 1558 else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL) 1559 TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs); 1560 else 1561 TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs); 1562 error = BUF_PCTRIE_INSERT(&bv->bv_root, bp); 1563 if (error) 1564 panic("buf_vlist_add: Preallocated nodes insufficient."); |
1612 bv->bv_cnt++; | 1565 bv->bv_cnt++; |
1613 bv->bv_root = bp; | |
1614} 1615 1616/* 1617 * Lookup a buffer using the splay tree. Note that we specifically avoid 1618 * shadow buffers used in background bitmap writes. 1619 * 1620 * This code isn't quite efficient as it could be because we are maintaining 1621 * two sorted lists and do not know which list the block resides in. --- 4 unchanged lines hidden (view full) --- 1626 * first tree splayed. 1627 */ 1628struct buf * 1629gbincore(struct bufobj *bo, daddr_t lblkno) 1630{ 1631 struct buf *bp; 1632 1633 ASSERT_BO_LOCKED(bo); | 1566} 1567 1568/* 1569 * Lookup a buffer using the splay tree. Note that we specifically avoid 1570 * shadow buffers used in background bitmap writes. 1571 * 1572 * This code isn't quite efficient as it could be because we are maintaining 1573 * two sorted lists and do not know which list the block resides in. --- 4 unchanged lines hidden (view full) --- 1578 * first tree splayed. 1579 */ 1580struct buf * 1581gbincore(struct bufobj *bo, daddr_t lblkno) 1582{ 1583 struct buf *bp; 1584 1585 ASSERT_BO_LOCKED(bo); |
1634 if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno) | 1586 bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); 1587 if (bp != NULL) |
1635 return (bp); | 1588 return (bp); |
1636 if ((bp = bo->bo_dirty.bv_root) != NULL && bp->b_lblkno == lblkno) 1637 return (bp); 1638 if ((bp = bo->bo_clean.bv_root) != NULL) { 1639 bo->bo_clean.bv_root = bp = buf_splay(lblkno, 0, bp); 1640 if (bp->b_lblkno == lblkno) 1641 return (bp); 1642 } 1643 if ((bp = bo->bo_dirty.bv_root) != NULL) { 1644 bo->bo_dirty.bv_root = bp = buf_splay(lblkno, 0, bp); 1645 if (bp->b_lblkno == lblkno) 1646 return (bp); 1647 } 1648 return (NULL); | 1589 return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); |
1649} 1650 1651/* 1652 * Associate a buffer with a vnode. 1653 */ 1654void 1655bgetvp(struct vnode *vp, struct buf *bp) 1656{ --- 798 unchanged lines hidden (view full) --- 2455 VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, 2456 ("cleaned vnode still on the free list.")); 2457 VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); 2458 VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); 2459 VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); 2460 VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); 2461 VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); 2462 VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); | 1590} 1591 1592/* 1593 * Associate a buffer with a vnode. 1594 */ 1595void 1596bgetvp(struct vnode *vp, struct buf *bp) 1597{ --- 798 unchanged lines hidden (view full) --- 2396 VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, 2397 ("cleaned vnode still on the free list.")); 2398 VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); 2399 VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); 2400 VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); 2401 VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); 2402 VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); 2403 VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); |
2463 VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL")); | 2404 VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp, 2405 ("clean blk trie not empty")); |
2464 VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); | 2406 VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); |
2465 VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL")); | 2407 VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp, 2408 ("dirty blk trie not empty")); |
2466 VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); 2467 VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); 2468 VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for ..")); 2469 VI_UNLOCK(vp); 2470#ifdef MAC 2471 mac_vnode_destroy(vp); 2472#endif 2473 if (vp->v_pollinfo != NULL) --- 2363 unchanged lines hidden --- | 2409 VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); 2410 VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); 2411 VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for ..")); 2412 VI_UNLOCK(vp); 2413#ifdef MAC 2414 mac_vnode_destroy(vp); 2415#endif 2416 if (vp->v_pollinfo != NULL) --- 2363 unchanged lines hidden --- |