Deleted Added
full compact
vfs_bio.c (25649) vfs_bio.c (25930)
1/*
2 * Copyright (c) 1994 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright

--- 4 unchanged lines hidden (view full) ---

13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. This work was done expressly for inclusion into FreeBSD. Other use
17 * is allowed if this notation is included.
18 * 5. Modifications may be freely made to this file if the above conditions
19 * are met.
20 *
1/*
2 * Copyright (c) 1994 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright

--- 4 unchanged lines hidden (view full) ---

13 * documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 * John S. Dyson.
16 * 4. This work was done expressly for inclusion into FreeBSD. Other use
17 * is allowed if this notation is included.
18 * 5. Modifications may be freely made to this file if the above conditions
19 * are met.
20 *
21 * $Id: vfs_bio.c,v 1.114 1997/04/13 03:33:25 dyson Exp $
21 * $Id: vfs_bio.c,v 1.115 1997/05/10 09:09:42 joerg Exp $
22 */
23
24/*
25 * this file contains a new buffer I/O scheme implementing a coherent
26 * VM object and buffer cache scheme. Pains have been taken to make
27 * sure that the performance degradation associated with schemes such
28 * as this is not realized.
29 *

--- 42 unchanged lines hidden (view full) ---

72struct buf *buf; /* buffer header pool */
73struct swqueue bswlist;
74
75int count_lock_queue __P((void));
76static void vm_hold_free_pages(struct buf * bp, vm_offset_t from,
77 vm_offset_t to);
78static void vm_hold_load_pages(struct buf * bp, vm_offset_t from,
79 vm_offset_t to);
22 */
23
24/*
25 * this file contains a new buffer I/O scheme implementing a coherent
26 * VM object and buffer cache scheme. Pains have been taken to make
27 * sure that the performance degradation associated with schemes such
28 * as this is not realized.
29 *

--- 42 unchanged lines hidden (view full) ---

72struct buf *buf; /* buffer header pool */
73struct swqueue bswlist;
74
75int count_lock_queue __P((void));
76static void vm_hold_free_pages(struct buf * bp, vm_offset_t from,
77 vm_offset_t to);
78static void vm_hold_load_pages(struct buf * bp, vm_offset_t from,
79 vm_offset_t to);
80static void vfs_buf_set_valid(struct buf *bp, vm_ooffset_t foff,
81 vm_offset_t off, vm_offset_t size,
82 vm_page_t m);
83static void vfs_page_set_valid(struct buf *bp, vm_offset_t off, vm_page_t m);
80static void vfs_clean_pages(struct buf * bp);
81static void vfs_setdirty(struct buf *bp);
82static void vfs_vmio_release(struct buf *bp);
83
84int needsbuffer;
85
86/*
87 * Internal update daemon, process 3

--- 408 unchanged lines hidden (view full) ---

496 }
497 }
498
499 /*
500 * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer
501 * constituted, so the B_INVAL flag is used to *invalidate* the buffer,
502 * but the VM object is kept around. The B_NOCACHE flag is used to
503 * invalidate the pages in the VM object.
84static void vfs_clean_pages(struct buf * bp);
85static void vfs_setdirty(struct buf *bp);
86static void vfs_vmio_release(struct buf *bp);
87
88int needsbuffer;
89
90/*
91 * Internal update daemon, process 3

--- 408 unchanged lines hidden (view full) ---

500 }
501 }
502
503 /*
504 * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer
505 * constituted, so the B_INVAL flag is used to *invalidate* the buffer,
506 * but the VM object is kept around. The B_NOCACHE flag is used to
507 * invalidate the pages in the VM object.
508 *
509 * If the buffer is a partially filled NFS buffer, keep it
510 * since invalidating it now will lose informatio. The valid
511 * flags in the vm_pages have only DEV_BSIZE resolution but
512 * the b_validoff, b_validend fields have byte resolution.
513 * This can avoid unnecessary re-reads of the buffer.
504 */
514 */
505 if (bp->b_flags & B_VMIO) {
515 if ((bp->b_flags & B_VMIO)
516 && (bp->b_vp->v_tag != VT_NFS
517 || (bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR))
518 || bp->b_validend == 0
519 || (bp->b_validoff == 0
520 && bp->b_validend == bp->b_bufsize))) {
506 vm_ooffset_t foff;
507 vm_object_t obj;
508 int i, resid;
509 vm_page_t m;
510 struct vnode *vp;
511 int iototal = bp->b_bufsize;
512
513 vp = bp->b_vp;

--- 899 unchanged lines hidden (view full) ---

1413 obj = vp->v_object;
1414 tinc = PAGE_SIZE;
1415 if (tinc > bsize)
1416 tinc = bsize;
1417 off = (vm_ooffset_t) bp->b_lblkno * bsize;
1418 curbpnpages = bp->b_npages;
1419 doretry:
1420 bp->b_flags |= B_CACHE;
521 vm_ooffset_t foff;
522 vm_object_t obj;
523 int i, resid;
524 vm_page_t m;
525 struct vnode *vp;
526 int iototal = bp->b_bufsize;
527
528 vp = bp->b_vp;

--- 899 unchanged lines hidden (view full) ---

1428 obj = vp->v_object;
1429 tinc = PAGE_SIZE;
1430 if (tinc > bsize)
1431 tinc = bsize;
1432 off = (vm_ooffset_t) bp->b_lblkno * bsize;
1433 curbpnpages = bp->b_npages;
1434 doretry:
1435 bp->b_flags |= B_CACHE;
1436 bp->b_validoff = bp->b_validend = 0;
1421 for (toff = 0; toff < newbsize; toff += tinc) {
1422 int bytesinpage;
1423
1424 pageindex = toff >> PAGE_SHIFT;
1425 objoff = OFF_TO_IDX(off + toff);
1426 if (pageindex < curbpnpages) {
1427
1428 m = bp->b_pages[pageindex];
1429#ifdef VFS_BIO_DIAG
1430 if (m->pindex != objoff)
1431 panic("allocbuf: page changed offset??!!!?");
1432#endif
1433 bytesinpage = tinc;
1434 if (tinc > (newbsize - toff))
1435 bytesinpage = newbsize - toff;
1437 for (toff = 0; toff < newbsize; toff += tinc) {
1438 int bytesinpage;
1439
1440 pageindex = toff >> PAGE_SHIFT;
1441 objoff = OFF_TO_IDX(off + toff);
1442 if (pageindex < curbpnpages) {
1443
1444 m = bp->b_pages[pageindex];
1445#ifdef VFS_BIO_DIAG
1446 if (m->pindex != objoff)
1447 panic("allocbuf: page changed offset??!!!?");
1448#endif
1449 bytesinpage = tinc;
1450 if (tinc > (newbsize - toff))
1451 bytesinpage = newbsize - toff;
1436 if ((bp->b_flags & B_CACHE) &&
1437 !vm_page_is_valid(m,
1438 (vm_offset_t) ((toff + off) & PAGE_MASK),
1439 bytesinpage)) {
1440 bp->b_flags &= ~B_CACHE;
1441 }
1452 if (bp->b_flags & B_CACHE)
1453 vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
1442 continue;
1443 }
1444 m = vm_page_lookup(obj, objoff);
1445 if (!m) {
1446 m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
1447 if (!m) {
1448 VM_WAIT;
1449 goto doretry;

--- 19 unchanged lines hidden (view full) ---

1469 ((m->queue - m->pc) == PQ_CACHE) &&
1470 ((cnt.v_free_count + cnt.v_cache_count) <
1471 (cnt.v_free_min + cnt.v_cache_min))) {
1472 pagedaemon_wakeup();
1473 }
1474 bytesinpage = tinc;
1475 if (tinc > (newbsize - toff))
1476 bytesinpage = newbsize - toff;
1454 continue;
1455 }
1456 m = vm_page_lookup(obj, objoff);
1457 if (!m) {
1458 m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
1459 if (!m) {
1460 VM_WAIT;
1461 goto doretry;

--- 19 unchanged lines hidden (view full) ---

1481 ((m->queue - m->pc) == PQ_CACHE) &&
1482 ((cnt.v_free_count + cnt.v_cache_count) <
1483 (cnt.v_free_min + cnt.v_cache_min))) {
1484 pagedaemon_wakeup();
1485 }
1486 bytesinpage = tinc;
1487 if (tinc > (newbsize - toff))
1488 bytesinpage = newbsize - toff;
1477 if ((bp->b_flags & B_CACHE) &&
1478 !vm_page_is_valid(m,
1479 (vm_offset_t) ((toff + off) & PAGE_MASK),
1480 bytesinpage)) {
1481 bp->b_flags &= ~B_CACHE;
1482 }
1489 if (bp->b_flags & B_CACHE)
1490 vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
1483 vm_page_wire(m);
1484 }
1485 bp->b_pages[pageindex] = m;
1486 curbpnpages = pageindex + 1;
1487 }
1491 vm_page_wire(m);
1492 }
1493 bp->b_pages[pageindex] = m;
1494 curbpnpages = pageindex + 1;
1495 }
1496 if (vp->v_tag == VT_NFS && bp->b_validend == 0)
1497 bp->b_flags &= ~B_CACHE;
1488 bp->b_data = (caddr_t) trunc_page(bp->b_data);
1489 bp->b_npages = curbpnpages;
1490 pmap_qenter((vm_offset_t) bp->b_data,
1491 bp->b_pages, bp->b_npages);
1492 ((vm_offset_t) bp->b_data) |= off & PAGE_MASK;
1493 }
1494 }
1495 }

--- 61 unchanged lines hidden (view full) ---

1557 if (bp->b_flags & B_CALL) {
1558 bp->b_flags &= ~B_CALL;
1559 (*bp->b_iodone) (bp);
1560 splx(s);
1561 return;
1562 }
1563 if (bp->b_flags & B_VMIO) {
1564 int i, resid;
1498 bp->b_data = (caddr_t) trunc_page(bp->b_data);
1499 bp->b_npages = curbpnpages;
1500 pmap_qenter((vm_offset_t) bp->b_data,
1501 bp->b_pages, bp->b_npages);
1502 ((vm_offset_t) bp->b_data) |= off & PAGE_MASK;
1503 }
1504 }
1505 }

--- 61 unchanged lines hidden (view full) ---

1567 if (bp->b_flags & B_CALL) {
1568 bp->b_flags &= ~B_CALL;
1569 (*bp->b_iodone) (bp);
1570 splx(s);
1571 return;
1572 }
1573 if (bp->b_flags & B_VMIO) {
1574 int i, resid;
1565 vm_ooffset_t foff;
1575 vm_ooffset_t foff, bfoff;
1566 vm_page_t m;
1567 vm_object_t obj;
1568 int iosize;
1569 struct vnode *vp = bp->b_vp;
1570
1571 if (vp->v_type == VBLK)
1572 foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno;
1573 else
1574 foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1576 vm_page_t m;
1577 vm_object_t obj;
1578 int iosize;
1579 struct vnode *vp = bp->b_vp;
1580
1581 if (vp->v_type == VBLK)
1582 foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno;
1583 else
1584 foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1585 bfoff = foff;
1575 obj = vp->v_object;
1576 if (!obj) {
1577 panic("biodone: no object");
1578 }
1579#if defined(VFS_BIO_DEBUG)
1580 if (obj->paging_in_progress < bp->b_npages) {
1581 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
1582 obj->paging_in_progress, bp->b_npages);

--- 25 unchanged lines hidden (view full) ---

1608 if (resid > iosize)
1609 resid = iosize;
1610 /*
1611 * In the write case, the valid and clean bits are
1612 * already changed correctly, so we only need to do this
1613 * here in the read case.
1614 */
1615 if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) {
1586 obj = vp->v_object;
1587 if (!obj) {
1588 panic("biodone: no object");
1589 }
1590#if defined(VFS_BIO_DEBUG)
1591 if (obj->paging_in_progress < bp->b_npages) {
1592 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
1593 obj->paging_in_progress, bp->b_npages);

--- 25 unchanged lines hidden (view full) ---

1619 if (resid > iosize)
1620 resid = iosize;
1621 /*
1622 * In the write case, the valid and clean bits are
1623 * already changed correctly, so we only need to do this
1624 * here in the read case.
1625 */
1626 if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) {
1616 vm_page_set_validclean(m,
1617 (vm_offset_t) (foff & PAGE_MASK), resid);
1627 vfs_page_set_valid(bp, foff - bfoff, m);
1618 }
1619
1620 /*
1621 * when debugging new filesystems or buffer I/O methods, this
1622 * is the most common error that pops up. if you see this, you
1623 * have not set the page busy flag correctly!!!
1624 */
1625 if (m->busy == 0) {

--- 130 unchanged lines hidden (view full) ---

1756 (obj->flags & OBJ_PIPWNT)) {
1757 obj->flags &= ~OBJ_PIPWNT;
1758 wakeup(obj);
1759 }
1760 }
1761}
1762
1763/*
1628 }
1629
1630 /*
1631 * when debugging new filesystems or buffer I/O methods, this
1632 * is the most common error that pops up. if you see this, you
1633 * have not set the page busy flag correctly!!!
1634 */
1635 if (m->busy == 0) {

--- 130 unchanged lines hidden (view full) ---

1766 (obj->flags & OBJ_PIPWNT)) {
1767 obj->flags &= ~OBJ_PIPWNT;
1768 wakeup(obj);
1769 }
1770 }
1771}
1772
1773/*
1774 * Set NFS' b_validoff and b_validend fields from the valid bits
1775 * of a page. If the consumer is not NFS, and the page is not
1776 * valid for the entire range, clear the B_CACHE flag to force
1777 * the consumer to re-read the page.
1778 */
1779static void
1780vfs_buf_set_valid(struct buf *bp,
1781 vm_ooffset_t foff, vm_offset_t off, vm_offset_t size,
1782 vm_page_t m)
1783{
1784 if (bp->b_vp->v_tag == VT_NFS) {
1785 vm_offset_t svalid, evalid;
1786 int validbits = m->valid;
1787
1788 /*
1789 * This only bothers with the first valid range in the
1790 * page.
1791 */
1792 svalid = off;
1793 while (validbits && !(validbits & 1)) {
1794 svalid += DEV_BSIZE;
1795 validbits >>= 1;
1796 }
1797 evalid = svalid;
1798 while (validbits & 1) {
1799 evalid += DEV_BSIZE;
1800 validbits >>= 1;
1801 }
1802 /*
1803 * Make sure this range is contiguous with the range
1804 * built up from previous pages. If not, then we will
1805 * just use the range from the previous pages.
1806 */
1807 if (svalid == bp->b_validend) {
1808 bp->b_validoff = min(bp->b_validoff, svalid);
1809 bp->b_validend = max(bp->b_validend, evalid);
1810 }
1811 } else if (!vm_page_is_valid(m,
1812 (vm_offset_t) ((foff + off) & PAGE_MASK),
1813 size)) {
1814 bp->b_flags &= ~B_CACHE;
1815 }
1816}
1817
1818/*
1819 * Set the valid bits in a page, taking care of the b_validoff,
1820 * b_validend fields which NFS uses to optimise small reads. Off is
1821 * the offset of the page within the buf.
1822 */
1823static void
1824vfs_page_set_valid(struct buf *bp, vm_offset_t off, vm_page_t m)
1825{
1826 struct vnode *vp = bp->b_vp;
1827 vm_offset_t soff, eoff;
1828
1829 soff = off;
1830 eoff = min(off + PAGE_SIZE, bp->b_bufsize);
1831 if (vp->v_tag == VT_NFS) {
1832 soff = max((bp->b_validoff + DEV_BSIZE - 1) & -DEV_BSIZE, soff);
1833 eoff = min(bp->b_validend & -DEV_BSIZE, eoff);
1834 }
1835 vm_page_set_invalid(m, 0, PAGE_SIZE);
1836 if (eoff > soff)
1837 vm_page_set_validclean(m, soff, eoff - soff);
1838}
1839
1840/*
1764 * This routine is called before a device strategy routine.
1765 * It is used to tell the VM system that paging I/O is in
1766 * progress, and treat the pages associated with the buffer
1767 * almost as being PG_BUSY. Also the object paging_in_progress
1768 * flag is handled to make sure that the object doesn't become
1769 * inconsistant.
1770 */
1771void
1772vfs_busy_pages(struct buf * bp, int clear_modify)
1773{
1774 int i;
1775
1776 if (bp->b_flags & B_VMIO) {
1777 vm_object_t obj = bp->b_vp->v_object;
1841 * This routine is called before a device strategy routine.
1842 * It is used to tell the VM system that paging I/O is in
1843 * progress, and treat the pages associated with the buffer
1844 * almost as being PG_BUSY. Also the object paging_in_progress
1845 * flag is handled to make sure that the object doesn't become
1846 * inconsistant.
1847 */
1848void
1849vfs_busy_pages(struct buf * bp, int clear_modify)
1850{
1851 int i;
1852
1853 if (bp->b_flags & B_VMIO) {
1854 vm_object_t obj = bp->b_vp->v_object;
1778 vm_ooffset_t foff;
1779 int iocount = bp->b_bufsize;
1855 vm_offset_t off;
1780
1856
1781 if (bp->b_vp->v_type == VBLK)
1782 foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno;
1783 else
1784 foff = (vm_ooffset_t) bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1785 vfs_setdirty(bp);
1857 vfs_setdirty(bp);
1786 for (i = 0; i < bp->b_npages; i++) {
1858 for (i = 0, off = 0; i < bp->b_npages; i++, off += PAGE_SIZE) {
1787 vm_page_t m = bp->b_pages[i];
1859 vm_page_t m = bp->b_pages[i];
1788 int resid = IDX_TO_OFF(m->pindex + 1) - foff;
1789
1860
1790 if (resid > iocount)
1791 resid = iocount;
1792 if ((bp->b_flags & B_CLUSTER) == 0) {
1793 obj->paging_in_progress++;
1794 m->busy++;
1795 }
1796 vm_page_protect(m, VM_PROT_NONE);
1861 if ((bp->b_flags & B_CLUSTER) == 0) {
1862 obj->paging_in_progress++;
1863 m->busy++;
1864 }
1865 vm_page_protect(m, VM_PROT_NONE);
1797 if (clear_modify) {
1798 vm_page_set_validclean(m,
1799 (vm_offset_t) (foff & PAGE_MASK), resid);
1800 } else if (bp->b_bcount >= PAGE_SIZE) {
1866 if (clear_modify)
1867 vfs_page_set_valid(bp, off, m);
1868 else if (bp->b_bcount >= PAGE_SIZE) {
1801 if (m->valid && (bp->b_flags & B_CACHE) == 0) {
1802 bp->b_pages[i] = bogus_page;
1803 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1804 }
1805 }
1869 if (m->valid && (bp->b_flags & B_CACHE) == 0) {
1870 bp->b_pages[i] = bogus_page;
1871 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1872 }
1873 }
1806 foff += resid;
1807 iocount -= resid;
1808 }
1809 }
1810}
1811
1812/*
1813 * Tell the VM system that the pages associated with this buffer
1814 * are clean. This is used for delayed writes where the data is
1815 * going to go to disk eventually without additional VM intevention.
1816 */
1817void
1818vfs_clean_pages(struct buf * bp)
1819{
1820 int i;
1821
1822 if (bp->b_flags & B_VMIO) {
1874 }
1875 }
1876}
1877
1878/*
1879 * Tell the VM system that the pages associated with this buffer
1880 * are clean. This is used for delayed writes where the data is
1881 * going to go to disk eventually without additional VM intevention.
1882 */
1883void
1884vfs_clean_pages(struct buf * bp)
1885{
1886 int i;
1887
1888 if (bp->b_flags & B_VMIO) {
1823 vm_ooffset_t foff;
1824 int iocount = bp->b_bufsize;
1889 vm_offset_t off;
1825
1890
1826 if (bp->b_vp->v_type == VBLK)
1827 foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno;
1828 else
1829 foff = (vm_ooffset_t) bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1830
1831 for (i = 0; i < bp->b_npages; i++) {
1891 for (i = 0, off = 0; i < bp->b_npages; i++, off += PAGE_SIZE) {
1832 vm_page_t m = bp->b_pages[i];
1892 vm_page_t m = bp->b_pages[i];
1833 int resid = IDX_TO_OFF(m->pindex + 1) - foff;
1834
1893
1835 if (resid > iocount)
1836 resid = iocount;
1837 if (resid > 0) {
1838 vm_page_set_validclean(m,
1839 ((vm_offset_t) foff & PAGE_MASK), resid);
1840 }
1841 foff += resid;
1842 iocount -= resid;
1894 vfs_page_set_valid(bp, off, m);
1843 }
1844 }
1845}
1846
1847void
1848vfs_bio_clrbuf(struct buf *bp) {
1849 int i;
1850 if( bp->b_flags & B_VMIO) {

--- 123 unchanged lines hidden ---
1895 }
1896 }
1897}
1898
1899void
1900vfs_bio_clrbuf(struct buf *bp) {
1901 int i;
1902 if( bp->b_flags & B_VMIO) {

--- 123 unchanged lines hidden ---