Deleted Added
full compact
vfs_subr.c (250505) vfs_subr.c (250551)
1/*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.

--- 25 unchanged lines hidden (view full) ---

34 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
35 */
36
37/*
38 * External virtual filesystem routines
39 */
40
41#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.

--- 25 unchanged lines hidden (view full) ---

34 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
35 */
36
37/*
38 * External virtual filesystem routines
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/sys/kern/vfs_subr.c 250505 2013-05-11 11:17:44Z kib $");
42__FBSDID("$FreeBSD: head/sys/kern/vfs_subr.c 250551 2013-05-12 04:05:01Z jeff $");
43
44#include "opt_compat.h"
45#include "opt_ddb.h"
46#include "opt_watchdog.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/bio.h>

--- 9 unchanged lines hidden (view full) ---

60#include <sys/jail.h>
61#include <sys/kdb.h>
62#include <sys/kernel.h>
63#include <sys/kthread.h>
64#include <sys/lockf.h>
65#include <sys/malloc.h>
66#include <sys/mount.h>
67#include <sys/namei.h>
43
44#include "opt_compat.h"
45#include "opt_ddb.h"
46#include "opt_watchdog.h"
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/bio.h>

--- 9 unchanged lines hidden (view full) ---

60#include <sys/jail.h>
61#include <sys/kdb.h>
62#include <sys/kernel.h>
63#include <sys/kthread.h>
64#include <sys/lockf.h>
65#include <sys/malloc.h>
66#include <sys/mount.h>
67#include <sys/namei.h>
68#include <sys/pctrie.h>
68#include <sys/priv.h>
69#include <sys/reboot.h>
70#include <sys/rwlock.h>
71#include <sys/sched.h>
72#include <sys/sleepqueue.h>
73#include <sys/smp.h>
74#include <sys/stat.h>
75#include <sys/sysctl.h>

--- 103 unchanged lines hidden (view full) ---

179 * numvnodes
180 * freevnodes
181 */
182static struct mtx vnode_free_list_mtx;
183
184/* Publicly exported FS */
185struct nfs_public nfs_pub;
186
69#include <sys/priv.h>
70#include <sys/reboot.h>
71#include <sys/rwlock.h>
72#include <sys/sched.h>
73#include <sys/sleepqueue.h>
74#include <sys/smp.h>
75#include <sys/stat.h>
76#include <sys/sysctl.h>

--- 103 unchanged lines hidden (view full) ---

180 * numvnodes
181 * freevnodes
182 */
183static struct mtx vnode_free_list_mtx;
184
185/* Publicly exported FS */
186struct nfs_public nfs_pub;
187
188static uma_zone_t buf_trie_zone;
189
187/* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
188static uma_zone_t vnode_zone;
189static uma_zone_t vnodepoll_zone;
190
191/*
192 * The workitem queue.
193 *
194 * It is useful to delay writes of file data and filesystem metadata

--- 84 unchanged lines hidden (view full) ---

279#define VCANRECYCLE(vp) (((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt)
280#define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt)
281#define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt)
282
283/* Shift count for (uintptr_t)vp to initialize vp->v_hash. */
284static int vnsz2log;
285
286/*
190/* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
191static uma_zone_t vnode_zone;
192static uma_zone_t vnodepoll_zone;
193
194/*
195 * The workitem queue.
196 *
197 * It is useful to delay writes of file data and filesystem metadata

--- 84 unchanged lines hidden (view full) ---

282#define VCANRECYCLE(vp) (((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt)
283#define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt)
284#define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt)
285
286/* Shift count for (uintptr_t)vp to initialize vp->v_hash. */
287static int vnsz2log;
288
289/*
290 * Support for the bufobj clean & dirty pctrie.
291 */
292static void *
293buf_trie_alloc(struct pctrie *ptree)
294{
295
296 return uma_zalloc(buf_trie_zone, M_NOWAIT);
297}
298
299static void
300buf_trie_free(struct pctrie *ptree, void *node)
301{
302
303 uma_zfree(buf_trie_zone, node);
304}
305PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free);
306
307/*
287 * Initialize the vnode management data structures.
288 *
289 * Reevaluate the following cap on the number of vnodes after the physical
290 * memory size exceeds 512GB. In the limit, as the physical memory size
291 * grows, the ratio of physical pages to vnodes approaches sixteen to one.
292 */
293#ifndef MAXVNODES_MAX
294#define MAXVNODES_MAX (512 * (1024 * 1024 * 1024 / (int)PAGE_SIZE / 16))

--- 29 unchanged lines hidden (view full) ---

324 mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF);
325 TAILQ_INIT(&vnode_free_list);
326 mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF);
327 vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
328 NULL, NULL, UMA_ALIGN_PTR, 0);
329 vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
330 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
331 /*
308 * Initialize the vnode management data structures.
309 *
310 * Reevaluate the following cap on the number of vnodes after the physical
311 * memory size exceeds 512GB. In the limit, as the physical memory size
312 * grows, the ratio of physical pages to vnodes approaches sixteen to one.
313 */
314#ifndef MAXVNODES_MAX
315#define MAXVNODES_MAX (512 * (1024 * 1024 * 1024 / (int)PAGE_SIZE / 16))

--- 29 unchanged lines hidden (view full) ---

345 mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF);
346 TAILQ_INIT(&vnode_free_list);
347 mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF);
348 vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
349 NULL, NULL, UMA_ALIGN_PTR, 0);
350 vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
351 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
352 /*
353 * Preallocate enough nodes to support one-per buf so that
354 * we can not fail an insert. reassignbuf() callers can not
355 * tolerate the insertion failure.
356 */
357 buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(),
358 NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR,
359 UMA_ZONE_NOFREE | UMA_ZONE_VM);
360 uma_prealloc(buf_trie_zone, nbuf);
361 /*
332 * Initialize the filesystem syncer.
333 */
334 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
335 &syncer_mask);
336 syncer_maxdelay = syncer_mask + 1;
337 mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
338 cv_init(&sync_wakeup, "syncer");
339 for (i = 1; i <= sizeof(struct vnode); i <<= 1)

--- 1131 unchanged lines hidden (view full) ---

1471
1472 bufobj_wwait(bo, 0, 0);
1473 BO_UNLOCK(bo);
1474 vnode_pager_setsize(vp, length);
1475
1476 return (0);
1477}
1478
362 * Initialize the filesystem syncer.
363 */
364 syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
365 &syncer_mask);
366 syncer_maxdelay = syncer_mask + 1;
367 mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
368 cv_init(&sync_wakeup, "syncer");
369 for (i = 1; i <= sizeof(struct vnode); i <<= 1)

--- 1131 unchanged lines hidden (view full) ---

1501
1502 bufobj_wwait(bo, 0, 0);
1503 BO_UNLOCK(bo);
1504 vnode_pager_setsize(vp, length);
1505
1506 return (0);
1507}
1508
1479/*
1480 * buf_splay() - splay tree core for the clean/dirty list of buffers in
1481 * a vnode.
1482 *
1483 * NOTE: We have to deal with the special case of a background bitmap
1484 * buffer, a situation where two buffers will have the same logical
1485 * block offset. We want (1) only the foreground buffer to be accessed
1486 * in a lookup and (2) must differentiate between the foreground and
1487 * background buffer in the splay tree algorithm because the splay
1488 * tree cannot normally handle multiple entities with the same 'index'.
1489 * We accomplish this by adding differentiating flags to the splay tree's
1490 * numerical domain.
1491 */
1492static
1493struct buf *
1494buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root)
1495{
1496 struct buf dummy;
1497 struct buf *lefttreemax, *righttreemin, *y;
1498
1499 if (root == NULL)
1500 return (NULL);
1501 lefttreemax = righttreemin = &dummy;
1502 for (;;) {
1503 if (lblkno < root->b_lblkno) {
1504 if ((y = root->b_left) == NULL)
1505 break;
1506 if (lblkno < y->b_lblkno) {
1507 /* Rotate right. */
1508 root->b_left = y->b_right;
1509 y->b_right = root;
1510 root = y;
1511 if ((y = root->b_left) == NULL)
1512 break;
1513 }
1514 /* Link into the new root's right tree. */
1515 righttreemin->b_left = root;
1516 righttreemin = root;
1517 } else if (lblkno > root->b_lblkno) {
1518 if ((y = root->b_right) == NULL)
1519 break;
1520 if (lblkno > y->b_lblkno) {
1521 /* Rotate left. */
1522 root->b_right = y->b_left;
1523 y->b_left = root;
1524 root = y;
1525 if ((y = root->b_right) == NULL)
1526 break;
1527 }
1528 /* Link into the new root's left tree. */
1529 lefttreemax->b_right = root;
1530 lefttreemax = root;
1531 } else {
1532 break;
1533 }
1534 root = y;
1535 }
1536 /* Assemble the new root. */
1537 lefttreemax->b_right = root->b_left;
1538 righttreemin->b_left = root->b_right;
1539 root->b_left = dummy.b_right;
1540 root->b_right = dummy.b_left;
1541 return (root);
1542}
1543
1544static void
1545buf_vlist_remove(struct buf *bp)
1546{
1509static void
1510buf_vlist_remove(struct buf *bp)
1511{
1547 struct buf *root;
1548 struct bufv *bv;
1549
1550 KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
1551 ASSERT_BO_LOCKED(bp->b_bufobj);
1552 KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) !=
1553 (BX_VNDIRTY|BX_VNCLEAN),
1554 ("buf_vlist_remove: Buf %p is on two lists", bp));
1555 if (bp->b_xflags & BX_VNDIRTY)
1556 bv = &bp->b_bufobj->bo_dirty;
1557 else
1558 bv = &bp->b_bufobj->bo_clean;
1512 struct bufv *bv;
1513
1514 KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
1515 ASSERT_BO_LOCKED(bp->b_bufobj);
1516 KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) !=
1517 (BX_VNDIRTY|BX_VNCLEAN),
1518 ("buf_vlist_remove: Buf %p is on two lists", bp));
1519 if (bp->b_xflags & BX_VNDIRTY)
1520 bv = &bp->b_bufobj->bo_dirty;
1521 else
1522 bv = &bp->b_bufobj->bo_clean;
1559 if (bp != bv->bv_root) {
1560 root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root);
1561 KASSERT(root == bp, ("splay lookup failed in remove"));
1562 }
1563 if (bp->b_left == NULL) {
1564 root = bp->b_right;
1565 } else {
1566 root = buf_splay(bp->b_lblkno, bp->b_xflags, bp->b_left);
1567 root->b_right = bp->b_right;
1568 }
1569 bv->bv_root = root;
1523 BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno);
1570 TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs);
1571 bv->bv_cnt--;
1572 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
1573}
1574
1575/*
1524 TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs);
1525 bv->bv_cnt--;
1526 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
1527}
1528
1529/*
1576 * Add the buffer to the sorted clean or dirty block list using a
1577 * splay tree algorithm.
1530 * Add the buffer to the sorted clean or dirty block list.
1578 *
1579 * NOTE: xflags is passed as a constant, optimizing this inline function!
1580 */
1581static void
1582buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
1583{
1531 *
1532 * NOTE: xflags is passed as a constant, optimizing this inline function!
1533 */
1534static void
1535buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
1536{
1584 struct buf *root;
1585 struct bufv *bv;
1537 struct bufv *bv;
1538 struct buf *n;
1539 int error;
1586
1587 ASSERT_BO_LOCKED(bo);
1588 KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
1589 ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
1590 bp->b_xflags |= xflags;
1591 if (xflags & BX_VNDIRTY)
1592 bv = &bo->bo_dirty;
1593 else
1594 bv = &bo->bo_clean;
1595
1540
1541 ASSERT_BO_LOCKED(bo);
1542 KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
1543 ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
1544 bp->b_xflags |= xflags;
1545 if (xflags & BX_VNDIRTY)
1546 bv = &bo->bo_dirty;
1547 else
1548 bv = &bo->bo_clean;
1549
1596 root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root);
1597 if (root == NULL) {
1598 bp->b_left = NULL;
1599 bp->b_right = NULL;
1550 /*
1551 * Keep the list ordered. Optimize empty list insertion. Assume
1552 * we tend to grow at the tail so lookup_le should usually be cheaper
1553 * than _ge.
1554 */
1555 if (bv->bv_cnt == 0 ||
1556 bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno)
1600 TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
1557 TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
1601 } else if (bp->b_lblkno < root->b_lblkno) {
1602 bp->b_left = root->b_left;
1603 bp->b_right = root;
1604 root->b_left = NULL;
1605 TAILQ_INSERT_BEFORE(root, bp, b_bobufs);
1606 } else {
1607 bp->b_right = root->b_right;
1608 bp->b_left = root;
1609 root->b_right = NULL;
1610 TAILQ_INSERT_AFTER(&bv->bv_hd, root, bp, b_bobufs);
1611 }
1558 else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL)
1559 TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs);
1560 else
1561 TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs);
1562 error = BUF_PCTRIE_INSERT(&bv->bv_root, bp);
1563 if (error)
1564 panic("buf_vlist_add: Preallocated nodes insufficient.");
1612 bv->bv_cnt++;
1565 bv->bv_cnt++;
1613 bv->bv_root = bp;
1614}
1615
1616/*
1617 * Lookup a buffer using the splay tree. Note that we specifically avoid
1618 * shadow buffers used in background bitmap writes.
1619 *
1620 * This code isn't quite efficient as it could be because we are maintaining
1621 * two sorted lists and do not know which list the block resides in.

--- 4 unchanged lines hidden (view full) ---

1626 * first tree splayed.
1627 */
1628struct buf *
1629gbincore(struct bufobj *bo, daddr_t lblkno)
1630{
1631 struct buf *bp;
1632
1633 ASSERT_BO_LOCKED(bo);
1566}
1567
1568/*
1569 * Lookup a buffer using the splay tree. Note that we specifically avoid
1570 * shadow buffers used in background bitmap writes.
1571 *
1572 * This code isn't quite efficient as it could be because we are maintaining
1573 * two sorted lists and do not know which list the block resides in.

--- 4 unchanged lines hidden (view full) ---

1578 * first tree splayed.
1579 */
1580struct buf *
1581gbincore(struct bufobj *bo, daddr_t lblkno)
1582{
1583 struct buf *bp;
1584
1585 ASSERT_BO_LOCKED(bo);
1634 if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno)
1586 bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno);
1587 if (bp != NULL)
1635 return (bp);
1588 return (bp);
1636 if ((bp = bo->bo_dirty.bv_root) != NULL && bp->b_lblkno == lblkno)
1637 return (bp);
1638 if ((bp = bo->bo_clean.bv_root) != NULL) {
1639 bo->bo_clean.bv_root = bp = buf_splay(lblkno, 0, bp);
1640 if (bp->b_lblkno == lblkno)
1641 return (bp);
1642 }
1643 if ((bp = bo->bo_dirty.bv_root) != NULL) {
1644 bo->bo_dirty.bv_root = bp = buf_splay(lblkno, 0, bp);
1645 if (bp->b_lblkno == lblkno)
1646 return (bp);
1647 }
1648 return (NULL);
1589 return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno);
1649}
1650
1651/*
1652 * Associate a buffer with a vnode.
1653 */
1654void
1655bgetvp(struct vnode *vp, struct buf *bp)
1656{

--- 798 unchanged lines hidden (view full) ---

2455 VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
2456 ("cleaned vnode still on the free list."));
2457 VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
2458 VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count"));
2459 VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
2460 VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
2461 VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
2462 VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
1590}
1591
1592/*
1593 * Associate a buffer with a vnode.
1594 */
1595void
1596bgetvp(struct vnode *vp, struct buf *bp)
1597{

--- 798 unchanged lines hidden (view full) ---

2396 VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
2397 ("cleaned vnode still on the free list."));
2398 VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
2399 VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count"));
2400 VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
2401 VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
2402 VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
2403 VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
2463 VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL"));
2404 VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp,
2405 ("clean blk trie not empty"));
2464 VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
2406 VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
2465 VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL"));
2407 VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp,
2408 ("dirty blk trie not empty"));
2466 VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst"));
2467 VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src"));
2468 VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
2469 VI_UNLOCK(vp);
2470#ifdef MAC
2471 mac_vnode_destroy(vp);
2472#endif
2473 if (vp->v_pollinfo != NULL)

--- 2363 unchanged lines hidden ---
2409 VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst"));
2410 VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src"));
2411 VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
2412 VI_UNLOCK(vp);
2413#ifdef MAC
2414 mac_vnode_destroy(vp);
2415#endif
2416 if (vp->v_pollinfo != NULL)

--- 2363 unchanged lines hidden ---