Deleted Added
full compact
vfs_bio.c (156980) vfs_bio.c (157319)
1/*-
2 * Copyright (c) 2004 Poul-Henning Kamp
3 * Copyright (c) 1994,1997 John S. Dyson
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:

--- 25 unchanged lines hidden (view full) ---

34 * Author: John S. Dyson
35 * Significant help during the development and debugging phases
36 * had been provided by David Greenman, also of the FreeBSD core team.
37 *
38 * see man buf(9) for more info.
39 */
40
41#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004 Poul-Henning Kamp
3 * Copyright (c) 1994,1997 John S. Dyson
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:

--- 25 unchanged lines hidden (view full) ---

34 * Author: John S. Dyson
35 * Significant help during the development and debugging phases
36 * had been provided by David Greenman, also of the FreeBSD core team.
37 *
38 * see man buf(9) for more info.
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/sys/kern/vfs_bio.c 156980 2006-03-22 00:42:41Z pjd $");
42__FBSDID("$FreeBSD: head/sys/kern/vfs_bio.c 157319 2006-03-31 02:56:30Z jeff $");
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/bio.h>
47#include <sys/conf.h>
48#include <sys/buf.h>
49#include <sys/devicestat.h>
50#include <sys/eventhandler.h>

--- 46 unchanged lines hidden (view full) ---

97 vm_offset_t to);
98static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
99 int pageno, vm_page_t m);
100static void vfs_clean_pages(struct buf *bp);
101static void vfs_setdirty(struct buf *bp);
102static void vfs_vmio_release(struct buf *bp);
103static int vfs_bio_clcheck(struct vnode *vp, int size,
104 daddr_t lblkno, daddr_t blkno);
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/bio.h>
47#include <sys/conf.h>
48#include <sys/buf.h>
49#include <sys/devicestat.h>
50#include <sys/eventhandler.h>

--- 46 unchanged lines hidden (view full) ---

97 vm_offset_t to);
98static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
99 int pageno, vm_page_t m);
100static void vfs_clean_pages(struct buf *bp);
101static void vfs_setdirty(struct buf *bp);
102static void vfs_vmio_release(struct buf *bp);
103static int vfs_bio_clcheck(struct vnode *vp, int size,
104 daddr_t lblkno, daddr_t blkno);
105static int flushbufqueues(int flushdeps);
105static int flushbufqueues(int, int);
106static void buf_daemon(void);
107static void bremfreel(struct buf *bp);
108
109int vmiodirenable = TRUE;
110SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0,
111 "Use the VM system for directory writes");
112int runningbufspace;
113SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0,

--- 125 unchanged lines hidden (view full) ---

239/*
240 * Lock that protects against bwait()/bdone()/B_DONE races.
241 */
242static struct mtx bpinlock;
243
244/*
245 * Definitions for the buffer free lists.
246 */
106static void buf_daemon(void);
107static void bremfreel(struct buf *bp);
108
109int vmiodirenable = TRUE;
110SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0,
111 "Use the VM system for directory writes");
112int runningbufspace;
113SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0,

--- 125 unchanged lines hidden (view full) ---

239/*
240 * Lock that protects against bwait()/bdone()/B_DONE races.
241 */
242static struct mtx bpinlock;
243
244/*
245 * Definitions for the buffer free lists.
246 */
247#define BUFFER_QUEUES 5 /* number of free buffer queues */
247#define BUFFER_QUEUES 6 /* number of free buffer queues */
248
249#define QUEUE_NONE 0 /* on no queue */
250#define QUEUE_CLEAN 1 /* non-B_DELWRI buffers */
251#define QUEUE_DIRTY 2 /* B_DELWRI buffers */
248
249#define QUEUE_NONE 0 /* on no queue */
250#define QUEUE_CLEAN 1 /* non-B_DELWRI buffers */
251#define QUEUE_DIRTY 2 /* B_DELWRI buffers */
252#define QUEUE_EMPTYKVA 3 /* empty buffer headers w/KVA assignment */
253#define QUEUE_EMPTY 4 /* empty buffer headers */
252#define QUEUE_DIRTY_GIANT 3 /* B_DELWRI buffers that need giant */
253#define QUEUE_EMPTYKVA 4 /* empty buffer headers w/KVA assignment */
254#define QUEUE_EMPTY 5 /* empty buffer headers */
254
255/* Queues for free buffers with various properties */
256static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
257
258/* Lock for the bufqueues */
259static struct mtx bqlock;
260
261/*

--- 1089 unchanged lines hidden (view full) ---

1351 bp->b_flags |= B_INVAL;
1352 bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
1353 if (bp->b_vflags & BV_BKGRDINPROG)
1354 panic("losing buffer 2");
1355 bp->b_qindex = QUEUE_CLEAN;
1356 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
1357 /* remaining buffers */
1358 } else {
255
256/* Queues for free buffers with various properties */
257static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
258
259/* Lock for the bufqueues */
260static struct mtx bqlock;
261
262/*

--- 1089 unchanged lines hidden (view full) ---

1352 bp->b_flags |= B_INVAL;
1353 bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
1354 if (bp->b_vflags & BV_BKGRDINPROG)
1355 panic("losing buffer 2");
1356 bp->b_qindex = QUEUE_CLEAN;
1357 TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
1358 /* remaining buffers */
1359 } else {
1360 if (bp->b_flags & (B_DELWRI|B_NEEDSGIANT))
1361 bp->b_qindex = QUEUE_DIRTY_GIANT;
1359 if (bp->b_flags & B_DELWRI)
1360 bp->b_qindex = QUEUE_DIRTY;
1361 else
1362 bp->b_qindex = QUEUE_CLEAN;
1363 if (bp->b_flags & B_AGE)
1364 TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist);
1365 else
1366 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);

--- 74 unchanged lines hidden (view full) ---

1441 mtx_lock(&bqlock);
1442 /* Handle delayed bremfree() processing. */
1443 if (bp->b_flags & B_REMFREE)
1444 bremfreel(bp);
1445 if (bp->b_qindex != QUEUE_NONE)
1446 panic("bqrelse: free buffer onto another queue???");
1447 /* buffers with stale but valid contents */
1448 if (bp->b_flags & B_DELWRI) {
1362 if (bp->b_flags & B_DELWRI)
1363 bp->b_qindex = QUEUE_DIRTY;
1364 else
1365 bp->b_qindex = QUEUE_CLEAN;
1366 if (bp->b_flags & B_AGE)
1367 TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist);
1368 else
1369 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);

--- 74 unchanged lines hidden (view full) ---

1444 mtx_lock(&bqlock);
1445 /* Handle delayed bremfree() processing. */
1446 if (bp->b_flags & B_REMFREE)
1447 bremfreel(bp);
1448 if (bp->b_qindex != QUEUE_NONE)
1449 panic("bqrelse: free buffer onto another queue???");
1450 /* buffers with stale but valid contents */
1451 if (bp->b_flags & B_DELWRI) {
1449 bp->b_qindex = QUEUE_DIRTY;
1450 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
1452 if (bp->b_flags & B_NEEDSGIANT)
1453 bp->b_qindex = QUEUE_DIRTY_GIANT;
1454 else
1455 bp->b_qindex = QUEUE_DIRTY;
1456 TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
1451 } else {
1452 /*
1453 * XXX This lock may not be necessary since BKGRDINPROG
1454 * cannot be set while we hold the buf lock, it can only be
1455 * cleared if it is already pending.
1456 */
1457 BO_LOCK(bp->b_bufobj);
1458 if (!vm_page_count_severe() || bp->b_vflags & BV_BKGRDINPROG) {

--- 537 unchanged lines hidden (view full) ---

1996 buf_daemon,
1997 &bufdaemonproc
1998};
1999SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp)
2000
2001static void
2002buf_daemon()
2003{
1457 } else {
1458 /*
1459 * XXX This lock may not be necessary since BKGRDINPROG
1460 * cannot be set while we hold the buf lock, it can only be
1461 * cleared if it is already pending.
1462 */
1463 BO_LOCK(bp->b_bufobj);
1464 if (!vm_page_count_severe() || bp->b_vflags & BV_BKGRDINPROG) {

--- 537 unchanged lines hidden (view full) ---

2002 buf_daemon,
2003 &bufdaemonproc
2004};
2005SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp)
2006
2007static void
2008buf_daemon()
2009{
2004 mtx_lock(&Giant);
2005
2006 /*
2007 * This process needs to be suspended prior to shutdown sync.
2008 */
2009 EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, bufdaemonproc,
2010 SHUTDOWN_PRI_LAST);
2011
2012 /*

--- 9 unchanged lines hidden (view full) ---

2022
2023 /*
2024 * Do the flush. Limit the amount of in-transit I/O we
2025 * allow to build up, otherwise we would completely saturate
2026 * the I/O system. Wakeup any waiting processes before we
2027 * normally would so they can run in parallel with our drain.
2028 */
2029 while (numdirtybuffers > lodirtybuffers) {
2010
2011 /*
2012 * This process needs to be suspended prior to shutdown sync.
2013 */
2014 EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, bufdaemonproc,
2015 SHUTDOWN_PRI_LAST);
2016
2017 /*

--- 9 unchanged lines hidden (view full) ---

2027
2028 /*
2029 * Do the flush. Limit the amount of in-transit I/O we
2030 * allow to build up, otherwise we would completely saturate
2031 * the I/O system. Wakeup any waiting processes before we
2032 * normally would so they can run in parallel with our drain.
2033 */
2034 while (numdirtybuffers > lodirtybuffers) {
2030 if (flushbufqueues(0) == 0) {
2035 int flushed;
2036
2037 flushed = flushbufqueues(QUEUE_DIRTY, 0);
2038 /* The list empty check here is slightly racy */
2039 if (!TAILQ_EMPTY(&bufqueues[QUEUE_DIRTY_GIANT])) {
2040 mtx_lock(&Giant);
2041 flushed += flushbufqueues(QUEUE_DIRTY_GIANT, 0);
2042 mtx_unlock(&Giant);
2043 }
2044 if (flushed == 0) {
2031 /*
2032 * Could not find any buffers without rollback
2033 * dependencies, so just write the first one
2034 * in the hopes of eventually making progress.
2035 */
2045 /*
2046 * Could not find any buffers without rollback
2047 * dependencies, so just write the first one
2048 * in the hopes of eventually making progress.
2049 */
2036 flushbufqueues(1);
2050 flushbufqueues(QUEUE_DIRTY, 1);
2051 if (!TAILQ_EMPTY(
2052 &bufqueues[QUEUE_DIRTY_GIANT])) {
2053 mtx_lock(&Giant);
2054 flushbufqueues(QUEUE_DIRTY_GIANT, 1);
2055 mtx_unlock(&Giant);
2056 }
2037 break;
2038 }
2039 uio_yield();
2040 }
2041
2042 /*
2043 * Only clear bd_request if we have reached our low water
2044 * mark. The buf_daemon normally waits 1 second and

--- 31 unchanged lines hidden (view full) ---

2076 * free up B_INVAL buffers instead of write them, which NFS is
2077 * particularly sensitive to.
2078 */
2079static int flushwithdeps = 0;
2080SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
2081 0, "Number of buffers flushed with dependecies that require rollbacks");
2082
2083static int
2057 break;
2058 }
2059 uio_yield();
2060 }
2061
2062 /*
2063 * Only clear bd_request if we have reached our low water
2064 * mark. The buf_daemon normally waits 1 second and

--- 31 unchanged lines hidden (view full) ---

2096 * free up B_INVAL buffers instead of write them, which NFS is
2097 * particularly sensitive to.
2098 */
2099static int flushwithdeps = 0;
2100SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
2101 0, "Number of buffers flushed with dependecies that require rollbacks");
2102
2103static int
2084flushbufqueues(int flushdeps)
2104flushbufqueues(int queue, int flushdeps)
2085{
2086 struct thread *td = curthread;
2087 struct buf sentinel;
2088 struct vnode *vp;
2089 struct mount *mp;
2090 struct buf *bp;
2091 int hasdeps;
2092 int flushed;
2093 int target;
2094
2095 target = numdirtybuffers - lodirtybuffers;
2096 if (flushdeps && target > 2)
2097 target /= 2;
2098 flushed = 0;
2099 bp = NULL;
2100 mtx_lock(&bqlock);
2105{
2106 struct thread *td = curthread;
2107 struct buf sentinel;
2108 struct vnode *vp;
2109 struct mount *mp;
2110 struct buf *bp;
2111 int hasdeps;
2112 int flushed;
2113 int target;
2114
2115 target = numdirtybuffers - lodirtybuffers;
2116 if (flushdeps && target > 2)
2117 target /= 2;
2118 flushed = 0;
2119 bp = NULL;
2120 mtx_lock(&bqlock);
2101 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], &sentinel, b_freelist);
2121 TAILQ_INSERT_TAIL(&bufqueues[queue], &sentinel, b_freelist);
2102 while (flushed != target) {
2122 while (flushed != target) {
2103 bp = TAILQ_FIRST(&bufqueues[QUEUE_DIRTY]);
2123 bp = TAILQ_FIRST(&bufqueues[queue]);
2104 if (bp == &sentinel)
2105 break;
2124 if (bp == &sentinel)
2125 break;
2106 TAILQ_REMOVE(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
2107 TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
2126 TAILQ_REMOVE(&bufqueues[queue], bp, b_freelist);
2127 TAILQ_INSERT_TAIL(&bufqueues[queue], bp, b_freelist);
2108
2109 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
2110 continue;
2111 if (bp->b_pin_count > 0) {
2112 BUF_UNLOCK(bp);
2113 continue;
2114 }
2115 BO_LOCK(bp->b_bufobj);

--- 49 unchanged lines hidden (view full) ---

2165 waitrunningbufspace();
2166 numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2);
2167 mtx_lock(&bqlock);
2168 continue;
2169 }
2170 vn_finished_write(mp);
2171 BUF_UNLOCK(bp);
2172 }
2128
2129 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
2130 continue;
2131 if (bp->b_pin_count > 0) {
2132 BUF_UNLOCK(bp);
2133 continue;
2134 }
2135 BO_LOCK(bp->b_bufobj);

--- 49 unchanged lines hidden (view full) ---

2185 waitrunningbufspace();
2186 numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2);
2187 mtx_lock(&bqlock);
2188 continue;
2189 }
2190 vn_finished_write(mp);
2191 BUF_UNLOCK(bp);
2192 }
2173 TAILQ_REMOVE(&bufqueues[QUEUE_DIRTY], &sentinel, b_freelist);
2193 TAILQ_REMOVE(&bufqueues[queue], &sentinel, b_freelist);
2174 mtx_unlock(&bqlock);
2175 return (flushed);
2176}
2177
2178/*
2179 * Check to see if a block is currently memory resident.
2180 */
2181struct buf *

--- 388 unchanged lines hidden (view full) ---

2570 }
2571
2572 /*
2573 * Insert the buffer into the hash, so that it can
2574 * be found by incore.
2575 */
2576 bp->b_blkno = bp->b_lblkno = blkno;
2577 bp->b_offset = offset;
2194 mtx_unlock(&bqlock);
2195 return (flushed);
2196}
2197
2198/*
2199 * Check to see if a block is currently memory resident.
2200 */
2201struct buf *

--- 388 unchanged lines hidden (view full) ---

2590 }
2591
2592 /*
2593 * Insert the buffer into the hash, so that it can
2594 * be found by incore.
2595 */
2596 bp->b_blkno = bp->b_lblkno = blkno;
2597 bp->b_offset = offset;
2578
2579 bgetvp(vp, bp);
2580 BO_UNLOCK(bo);
2581
2582 /*
2583 * set B_VMIO bit. allocbuf() the buffer bigger. Since the
2584 * buffer size starts out as 0, B_CACHE will be set by
2585 * allocbuf() for the VMIO case prior to it testing the
2586 * backing store for validity.

--- 1314 unchanged lines hidden ---
2598 bgetvp(vp, bp);
2599 BO_UNLOCK(bo);
2600
2601 /*
2602 * set B_VMIO bit. allocbuf() the buffer bigger. Since the
2603 * buffer size starts out as 0, B_CACHE will be set by
2604 * allocbuf() for the VMIO case prior to it testing the
2605 * backing store for validity.

--- 1314 unchanged lines hidden ---