vfs_bio.c (25649) | vfs_bio.c (25930) |
---|---|
1/* 2 * Copyright (c) 1994 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 4 unchanged lines hidden (view full) --- 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. This work was done expressly for inclusion into FreeBSD. Other use 17 * is allowed if this notation is included. 18 * 5. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * | 1/* 2 * Copyright (c) 1994 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 4 unchanged lines hidden (view full) --- 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. This work was done expressly for inclusion into FreeBSD. Other use 17 * is allowed if this notation is included. 18 * 5. Modifications may be freely made to this file if the above conditions 19 * are met. 20 * |
21 * $Id: vfs_bio.c,v 1.114 1997/04/13 03:33:25 dyson Exp $ | 21 * $Id: vfs_bio.c,v 1.115 1997/05/10 09:09:42 joerg Exp $ |
22 */ 23 24/* 25 * this file contains a new buffer I/O scheme implementing a coherent 26 * VM object and buffer cache scheme. Pains have been taken to make 27 * sure that the performance degradation associated with schemes such 28 * as this is not realized. 29 * --- 42 unchanged lines hidden (view full) --- 72struct buf *buf; /* buffer header pool */ 73struct swqueue bswlist; 74 75int count_lock_queue __P((void)); 76static void vm_hold_free_pages(struct buf * bp, vm_offset_t from, 77 vm_offset_t to); 78static void vm_hold_load_pages(struct buf * bp, vm_offset_t from, 79 vm_offset_t to); | 22 */ 23 24/* 25 * this file contains a new buffer I/O scheme implementing a coherent 26 * VM object and buffer cache scheme. Pains have been taken to make 27 * sure that the performance degradation associated with schemes such 28 * as this is not realized. 29 * --- 42 unchanged lines hidden (view full) --- 72struct buf *buf; /* buffer header pool */ 73struct swqueue bswlist; 74 75int count_lock_queue __P((void)); 76static void vm_hold_free_pages(struct buf * bp, vm_offset_t from, 77 vm_offset_t to); 78static void vm_hold_load_pages(struct buf * bp, vm_offset_t from, 79 vm_offset_t to); |
80static void vfs_buf_set_valid(struct buf *bp, vm_ooffset_t foff, 81 vm_offset_t off, vm_offset_t size, 82 vm_page_t m); 83static void vfs_page_set_valid(struct buf *bp, vm_offset_t off, vm_page_t m); |
|
80static void vfs_clean_pages(struct buf * bp); 81static void vfs_setdirty(struct buf *bp); 82static void vfs_vmio_release(struct buf *bp); 83 84int needsbuffer; 85 86/* 87 * Internal update daemon, process 3 --- 408 unchanged lines hidden (view full) --- 496 } 497 } 498 499 /* 500 * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer 501 * constituted, so the B_INVAL flag is used to *invalidate* the buffer, 502 * but the VM object is kept around. The B_NOCACHE flag is used to 503 * invalidate the pages in the VM object. | 84static void vfs_clean_pages(struct buf * bp); 85static void vfs_setdirty(struct buf *bp); 86static void vfs_vmio_release(struct buf *bp); 87 88int needsbuffer; 89 90/* 91 * Internal update daemon, process 3 --- 408 unchanged lines hidden (view full) --- 500 } 501 } 502 503 /* 504 * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer 505 * constituted, so the B_INVAL flag is used to *invalidate* the buffer, 506 * but the VM object is kept around. The B_NOCACHE flag is used to 507 * invalidate the pages in the VM object. |
508 * 509 * If the buffer is a partially filled NFS buffer, keep it 510 * since invalidating it now will lose informatio. The valid 511 * flags in the vm_pages have only DEV_BSIZE resolution but 512 * the b_validoff, b_validend fields have byte resolution. 513 * This can avoid unnecessary re-reads of the buffer. |
|
504 */ | 514 */ |
505 if (bp->b_flags & B_VMIO) { | 515 if ((bp->b_flags & B_VMIO) 516 && (bp->b_vp->v_tag != VT_NFS 517 || (bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) 518 || bp->b_validend == 0 519 || (bp->b_validoff == 0 520 && bp->b_validend == bp->b_bufsize))) { |
506 vm_ooffset_t foff; 507 vm_object_t obj; 508 int i, resid; 509 vm_page_t m; 510 struct vnode *vp; 511 int iototal = bp->b_bufsize; 512 513 vp = bp->b_vp; --- 899 unchanged lines hidden (view full) --- 1413 obj = vp->v_object; 1414 tinc = PAGE_SIZE; 1415 if (tinc > bsize) 1416 tinc = bsize; 1417 off = (vm_ooffset_t) bp->b_lblkno * bsize; 1418 curbpnpages = bp->b_npages; 1419 doretry: 1420 bp->b_flags |= B_CACHE; | 521 vm_ooffset_t foff; 522 vm_object_t obj; 523 int i, resid; 524 vm_page_t m; 525 struct vnode *vp; 526 int iototal = bp->b_bufsize; 527 528 vp = bp->b_vp; --- 899 unchanged lines hidden (view full) --- 1428 obj = vp->v_object; 1429 tinc = PAGE_SIZE; 1430 if (tinc > bsize) 1431 tinc = bsize; 1432 off = (vm_ooffset_t) bp->b_lblkno * bsize; 1433 curbpnpages = bp->b_npages; 1434 doretry: 1435 bp->b_flags |= B_CACHE; |
1436 bp->b_validoff = bp->b_validend = 0; |
|
1421 for (toff = 0; toff < newbsize; toff += tinc) { 1422 int bytesinpage; 1423 1424 pageindex = toff >> PAGE_SHIFT; 1425 objoff = OFF_TO_IDX(off + toff); 1426 if (pageindex < curbpnpages) { 1427 1428 m = bp->b_pages[pageindex]; 1429#ifdef VFS_BIO_DIAG 1430 if (m->pindex != objoff) 1431 panic("allocbuf: page changed offset??!!!?"); 1432#endif 1433 bytesinpage = tinc; 1434 if (tinc > (newbsize - toff)) 1435 bytesinpage = newbsize - toff; | 1437 for (toff = 0; toff < newbsize; toff += tinc) { 1438 int bytesinpage; 1439 1440 pageindex = toff >> PAGE_SHIFT; 1441 objoff = OFF_TO_IDX(off + toff); 1442 if (pageindex < curbpnpages) { 1443 1444 m = bp->b_pages[pageindex]; 1445#ifdef VFS_BIO_DIAG 1446 if (m->pindex != objoff) 1447 panic("allocbuf: page changed offset??!!!?"); 1448#endif 1449 bytesinpage = tinc; 1450 if (tinc > (newbsize - toff)) 1451 bytesinpage = newbsize - toff; |
1436 if ((bp->b_flags & B_CACHE) && 1437 !vm_page_is_valid(m, 1438 (vm_offset_t) ((toff + off) & PAGE_MASK), 1439 bytesinpage)) { 1440 bp->b_flags &= ~B_CACHE; 1441 } | 1452 if (bp->b_flags & B_CACHE) 1453 vfs_buf_set_valid(bp, off, toff, bytesinpage, m); |
1442 continue; 1443 } 1444 m = vm_page_lookup(obj, objoff); 1445 if (!m) { 1446 m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL); 1447 if (!m) { 1448 VM_WAIT; 1449 goto doretry; --- 19 unchanged lines hidden (view full) --- 1469 ((m->queue - m->pc) == PQ_CACHE) && 1470 ((cnt.v_free_count + cnt.v_cache_count) < 1471 (cnt.v_free_min + cnt.v_cache_min))) { 1472 pagedaemon_wakeup(); 1473 } 1474 bytesinpage = tinc; 1475 if (tinc > (newbsize - toff)) 1476 bytesinpage = newbsize - toff; | 1454 continue; 1455 } 1456 m = vm_page_lookup(obj, objoff); 1457 if (!m) { 1458 m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL); 1459 if (!m) { 1460 VM_WAIT; 1461 goto doretry; --- 19 unchanged lines hidden (view full) --- 1481 ((m->queue - m->pc) == PQ_CACHE) && 1482 ((cnt.v_free_count + cnt.v_cache_count) < 1483 (cnt.v_free_min + cnt.v_cache_min))) { 1484 pagedaemon_wakeup(); 1485 } 1486 bytesinpage = tinc; 1487 if (tinc > (newbsize - toff)) 1488 bytesinpage = newbsize - toff; |
1477 if ((bp->b_flags & B_CACHE) && 1478 !vm_page_is_valid(m, 1479 (vm_offset_t) ((toff + off) & PAGE_MASK), 1480 bytesinpage)) { 1481 bp->b_flags &= ~B_CACHE; 1482 } | 1489 if (bp->b_flags & B_CACHE) 1490 vfs_buf_set_valid(bp, off, toff, bytesinpage, m); |
1483 vm_page_wire(m); 1484 } 1485 bp->b_pages[pageindex] = m; 1486 curbpnpages = pageindex + 1; 1487 } | 1491 vm_page_wire(m); 1492 } 1493 bp->b_pages[pageindex] = m; 1494 curbpnpages = pageindex + 1; 1495 } |
1496 if (vp->v_tag == VT_NFS && bp->b_validend == 0) 1497 bp->b_flags &= ~B_CACHE; |
|
1488 bp->b_data = (caddr_t) trunc_page(bp->b_data); 1489 bp->b_npages = curbpnpages; 1490 pmap_qenter((vm_offset_t) bp->b_data, 1491 bp->b_pages, bp->b_npages); 1492 ((vm_offset_t) bp->b_data) |= off & PAGE_MASK; 1493 } 1494 } 1495 } --- 61 unchanged lines hidden (view full) --- 1557 if (bp->b_flags & B_CALL) { 1558 bp->b_flags &= ~B_CALL; 1559 (*bp->b_iodone) (bp); 1560 splx(s); 1561 return; 1562 } 1563 if (bp->b_flags & B_VMIO) { 1564 int i, resid; | 1498 bp->b_data = (caddr_t) trunc_page(bp->b_data); 1499 bp->b_npages = curbpnpages; 1500 pmap_qenter((vm_offset_t) bp->b_data, 1501 bp->b_pages, bp->b_npages); 1502 ((vm_offset_t) bp->b_data) |= off & PAGE_MASK; 1503 } 1504 } 1505 } --- 61 unchanged lines hidden (view full) --- 1567 if (bp->b_flags & B_CALL) { 1568 bp->b_flags &= ~B_CALL; 1569 (*bp->b_iodone) (bp); 1570 splx(s); 1571 return; 1572 } 1573 if (bp->b_flags & B_VMIO) { 1574 int i, resid; |
1565 vm_ooffset_t foff; | 1575 vm_ooffset_t foff, bfoff; |
1566 vm_page_t m; 1567 vm_object_t obj; 1568 int iosize; 1569 struct vnode *vp = bp->b_vp; 1570 1571 if (vp->v_type == VBLK) 1572 foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; 1573 else 1574 foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; | 1576 vm_page_t m; 1577 vm_object_t obj; 1578 int iosize; 1579 struct vnode *vp = bp->b_vp; 1580 1581 if (vp->v_type == VBLK) 1582 foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; 1583 else 1584 foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; |
1585 bfoff = foff; |
|
1575 obj = vp->v_object; 1576 if (!obj) { 1577 panic("biodone: no object"); 1578 } 1579#if defined(VFS_BIO_DEBUG) 1580 if (obj->paging_in_progress < bp->b_npages) { 1581 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", 1582 obj->paging_in_progress, bp->b_npages); --- 25 unchanged lines hidden (view full) --- 1608 if (resid > iosize) 1609 resid = iosize; 1610 /* 1611 * In the write case, the valid and clean bits are 1612 * already changed correctly, so we only need to do this 1613 * here in the read case. 1614 */ 1615 if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) { | 1586 obj = vp->v_object; 1587 if (!obj) { 1588 panic("biodone: no object"); 1589 } 1590#if defined(VFS_BIO_DEBUG) 1591 if (obj->paging_in_progress < bp->b_npages) { 1592 printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", 1593 obj->paging_in_progress, bp->b_npages); --- 25 unchanged lines hidden (view full) --- 1619 if (resid > iosize) 1620 resid = iosize; 1621 /* 1622 * In the write case, the valid and clean bits are 1623 * already changed correctly, so we only need to do this 1624 * here in the read case. 1625 */ 1626 if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) { |
1616 vm_page_set_validclean(m, 1617 (vm_offset_t) (foff & PAGE_MASK), resid); | 1627 vfs_page_set_valid(bp, foff - bfoff, m); |
1618 } 1619 1620 /* 1621 * when debugging new filesystems or buffer I/O methods, this 1622 * is the most common error that pops up. if you see this, you 1623 * have not set the page busy flag correctly!!! 1624 */ 1625 if (m->busy == 0) { --- 130 unchanged lines hidden (view full) --- 1756 (obj->flags & OBJ_PIPWNT)) { 1757 obj->flags &= ~OBJ_PIPWNT; 1758 wakeup(obj); 1759 } 1760 } 1761} 1762 1763/* | 1628 } 1629 1630 /* 1631 * when debugging new filesystems or buffer I/O methods, this 1632 * is the most common error that pops up. if you see this, you 1633 * have not set the page busy flag correctly!!! 1634 */ 1635 if (m->busy == 0) { --- 130 unchanged lines hidden (view full) --- 1766 (obj->flags & OBJ_PIPWNT)) { 1767 obj->flags &= ~OBJ_PIPWNT; 1768 wakeup(obj); 1769 } 1770 } 1771} 1772 1773/* |
1774 * Set NFS' b_validoff and b_validend fields from the valid bits 1775 * of a page. If the consumer is not NFS, and the page is not 1776 * valid for the entire range, clear the B_CACHE flag to force 1777 * the consumer to re-read the page. 1778 */ 1779static void 1780vfs_buf_set_valid(struct buf *bp, 1781 vm_ooffset_t foff, vm_offset_t off, vm_offset_t size, 1782 vm_page_t m) 1783{ 1784 if (bp->b_vp->v_tag == VT_NFS) { 1785 vm_offset_t svalid, evalid; 1786 int validbits = m->valid; 1787 1788 /* 1789 * This only bothers with the first valid range in the 1790 * page. 1791 */ 1792 svalid = off; 1793 while (validbits && !(validbits & 1)) { 1794 svalid += DEV_BSIZE; 1795 validbits >>= 1; 1796 } 1797 evalid = svalid; 1798 while (validbits & 1) { 1799 evalid += DEV_BSIZE; 1800 validbits >>= 1; 1801 } 1802 /* 1803 * Make sure this range is contiguous with the range 1804 * built up from previous pages. If not, then we will 1805 * just use the range from the previous pages. 1806 */ 1807 if (svalid == bp->b_validend) { 1808 bp->b_validoff = min(bp->b_validoff, svalid); 1809 bp->b_validend = max(bp->b_validend, evalid); 1810 } 1811 } else if (!vm_page_is_valid(m, 1812 (vm_offset_t) ((foff + off) & PAGE_MASK), 1813 size)) { 1814 bp->b_flags &= ~B_CACHE; 1815 } 1816} 1817 1818/* 1819 * Set the valid bits in a page, taking care of the b_validoff, 1820 * b_validend fields which NFS uses to optimise small reads. Off is 1821 * the offset of the page within the buf. 1822 */ 1823static void 1824vfs_page_set_valid(struct buf *bp, vm_offset_t off, vm_page_t m) 1825{ 1826 struct vnode *vp = bp->b_vp; 1827 vm_offset_t soff, eoff; 1828 1829 soff = off; 1830 eoff = min(off + PAGE_SIZE, bp->b_bufsize); 1831 if (vp->v_tag == VT_NFS) { 1832 soff = max((bp->b_validoff + DEV_BSIZE - 1) & -DEV_BSIZE, soff); 1833 eoff = min(bp->b_validend & -DEV_BSIZE, eoff); 1834 } 1835 vm_page_set_invalid(m, 0, PAGE_SIZE); 1836 if (eoff > soff) 1837 vm_page_set_validclean(m, soff, eoff - soff); 1838} 1839 1840/* |
|
1764 * This routine is called before a device strategy routine. 1765 * It is used to tell the VM system that paging I/O is in 1766 * progress, and treat the pages associated with the buffer 1767 * almost as being PG_BUSY. Also the object paging_in_progress 1768 * flag is handled to make sure that the object doesn't become 1769 * inconsistant. 1770 */ 1771void 1772vfs_busy_pages(struct buf * bp, int clear_modify) 1773{ 1774 int i; 1775 1776 if (bp->b_flags & B_VMIO) { 1777 vm_object_t obj = bp->b_vp->v_object; | 1841 * This routine is called before a device strategy routine. 1842 * It is used to tell the VM system that paging I/O is in 1843 * progress, and treat the pages associated with the buffer 1844 * almost as being PG_BUSY. Also the object paging_in_progress 1845 * flag is handled to make sure that the object doesn't become 1846 * inconsistant. 1847 */ 1848void 1849vfs_busy_pages(struct buf * bp, int clear_modify) 1850{ 1851 int i; 1852 1853 if (bp->b_flags & B_VMIO) { 1854 vm_object_t obj = bp->b_vp->v_object; |
1778 vm_ooffset_t foff; 1779 int iocount = bp->b_bufsize; | 1855 vm_offset_t off; |
1780 | 1856 |
1781 if (bp->b_vp->v_type == VBLK) 1782 foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; 1783 else 1784 foff = (vm_ooffset_t) bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; | |
1785 vfs_setdirty(bp); | 1857 vfs_setdirty(bp); |
1786 for (i = 0; i < bp->b_npages; i++) { | 1858 for (i = 0, off = 0; i < bp->b_npages; i++, off += PAGE_SIZE) { |
1787 vm_page_t m = bp->b_pages[i]; | 1859 vm_page_t m = bp->b_pages[i]; |
1788 int resid = IDX_TO_OFF(m->pindex + 1) - foff; | |
1789 | 1860 |
1790 if (resid > iocount) 1791 resid = iocount; | |
1792 if ((bp->b_flags & B_CLUSTER) == 0) { 1793 obj->paging_in_progress++; 1794 m->busy++; 1795 } 1796 vm_page_protect(m, VM_PROT_NONE); | 1861 if ((bp->b_flags & B_CLUSTER) == 0) { 1862 obj->paging_in_progress++; 1863 m->busy++; 1864 } 1865 vm_page_protect(m, VM_PROT_NONE); |
1797 if (clear_modify) { 1798 vm_page_set_validclean(m, 1799 (vm_offset_t) (foff & PAGE_MASK), resid); 1800 } else if (bp->b_bcount >= PAGE_SIZE) { | 1866 if (clear_modify) 1867 vfs_page_set_valid(bp, off, m); 1868 else if (bp->b_bcount >= PAGE_SIZE) { |
1801 if (m->valid && (bp->b_flags & B_CACHE) == 0) { 1802 bp->b_pages[i] = bogus_page; 1803 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1804 } 1805 } | 1869 if (m->valid && (bp->b_flags & B_CACHE) == 0) { 1870 bp->b_pages[i] = bogus_page; 1871 pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); 1872 } 1873 } |
1806 foff += resid; 1807 iocount -= resid; | |
1808 } 1809 } 1810} 1811 1812/* 1813 * Tell the VM system that the pages associated with this buffer 1814 * are clean. This is used for delayed writes where the data is 1815 * going to go to disk eventually without additional VM intevention. 1816 */ 1817void 1818vfs_clean_pages(struct buf * bp) 1819{ 1820 int i; 1821 1822 if (bp->b_flags & B_VMIO) { | 1874 } 1875 } 1876} 1877 1878/* 1879 * Tell the VM system that the pages associated with this buffer 1880 * are clean. This is used for delayed writes where the data is 1881 * going to go to disk eventually without additional VM intevention. 1882 */ 1883void 1884vfs_clean_pages(struct buf * bp) 1885{ 1886 int i; 1887 1888 if (bp->b_flags & B_VMIO) { |
1823 vm_ooffset_t foff; 1824 int iocount = bp->b_bufsize; | 1889 vm_offset_t off; |
1825 | 1890 |
1826 if (bp->b_vp->v_type == VBLK) 1827 foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; 1828 else 1829 foff = (vm_ooffset_t) bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; 1830 1831 for (i = 0; i < bp->b_npages; i++) { | 1891 for (i = 0, off = 0; i < bp->b_npages; i++, off += PAGE_SIZE) { |
1832 vm_page_t m = bp->b_pages[i]; | 1892 vm_page_t m = bp->b_pages[i]; |
1833 int resid = IDX_TO_OFF(m->pindex + 1) - foff; | |
1834 | 1893 |
1835 if (resid > iocount) 1836 resid = iocount; 1837 if (resid > 0) { 1838 vm_page_set_validclean(m, 1839 ((vm_offset_t) foff & PAGE_MASK), resid); 1840 } 1841 foff += resid; 1842 iocount -= resid; | 1894 vfs_page_set_valid(bp, off, m); |
1843 } 1844 } 1845} 1846 1847void 1848vfs_bio_clrbuf(struct buf *bp) { 1849 int i; 1850 if( bp->b_flags & B_VMIO) { --- 123 unchanged lines hidden --- | 1895 } 1896 } 1897} 1898 1899void 1900vfs_bio_clrbuf(struct buf *bp) { 1901 int i; 1902 if( bp->b_flags & B_VMIO) { --- 123 unchanged lines hidden --- |