file_cache.cpp revision 7b8683b2
1f72376a8SAxel Dörfler/*
2190712ceSAxel Dörfler * Copyright 2004-2008, Axel D��rfler, axeld@pinc-software.de.
3eb9f0103SAxel Dörfler * Distributed under the terms of the MIT License.
4eb9f0103SAxel Dörfler */
5f72376a8SAxel Dörfler
6f72376a8SAxel Dörfler
7f72376a8SAxel Dörfler#include "vnode_store.h"
8f72376a8SAxel Dörfler
90d871d3cSAxel Dörfler#include <unistd.h>
100d871d3cSAxel Dörfler#include <stdlib.h>
110d871d3cSAxel Dörfler#include <string.h>
120d871d3cSAxel Dörfler
13f72376a8SAxel Dörfler#include <KernelExport.h>
14f72376a8SAxel Dörfler#include <fs_cache.h>
15f72376a8SAxel Dörfler
16279c6b76SIngo Weinhold#include <condition_variable.h>
17f72376a8SAxel Dörfler#include <file_cache.h>
18279c6b76SIngo Weinhold#include <generic_syscall.h>
195c99d639SIngo Weinhold#include <low_resource_manager.h>
207b8683b2SIngo Weinhold#include <thread.h>
210d871d3cSAxel Dörfler#include <util/AutoLock.h>
22279c6b76SIngo Weinhold#include <util/kernel_cpp.h>
23f72376a8SAxel Dörfler#include <vfs.h>
24f72376a8SAxel Dörfler#include <vm.h>
25f72376a8SAxel Dörfler#include <vm_page.h>
26f72376a8SAxel Dörfler#include <vm_cache.h>
27f72376a8SAxel Dörfler
287f12cc54SIngo Weinhold#include "io_requests.h"
297f12cc54SIngo Weinhold
30f72376a8SAxel Dörfler
31f72376a8SAxel Dörfler//#define TRACE_FILE_CACHE
32f72376a8SAxel Dörfler#ifdef TRACE_FILE_CACHE
33f72376a8SAxel Dörfler#	define TRACE(x) dprintf x
34f72376a8SAxel Dörfler#else
35f72376a8SAxel Dörfler#	define TRACE(x) ;
36f72376a8SAxel Dörfler#endif
37f72376a8SAxel Dörfler
380f6c560eSAxel Dörfler// maximum number of iovecs per request
39279c6b76SIngo Weinhold#define MAX_IO_VECS			32	// 128 kB
400f6c560eSAxel Dörfler#define MAX_FILE_IO_VECS	32
4111a3346cSAxel Dörfler
42cfe386c2SAxel Dörfler#define BYPASS_IO_SIZE		65536
43c6573329SAxel Dörfler#define LAST_ACCESSES		3
44c6573329SAxel Dörfler
45f72376a8SAxel Dörflerstruct file_cache_ref {
4658f6e8e5SAxel Dörfler	vm_cache		*cache;
4780f54692SAxel Dörfler	struct vnode	*vnode;
48c6573329SAxel Dörfler	off_t			last_access[LAST_ACCESSES];
49c6573329SAxel Dörfler		// TODO: it would probably be enough to only store the least
50c6573329SAxel Dörfler		//	significant 31 bits, and make this uint32 (one bit for
51c6573329SAxel Dörfler		//	write vs. read)
52c6573329SAxel Dörfler	int32			last_access_index;
537491000fSIngo Weinhold	uint16			disabled_count;
54c6573329SAxel Dörfler	bool			last_access_was_write;
55f72376a8SAxel Dörfler};
56f72376a8SAxel Dörfler
573d268edaSAxel Dörflertypedef status_t (*cache_func)(file_cache_ref *ref, void *cookie, off_t offset,
58a121b8c8SAxel Dörfler	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
5909149281SAxel Dörfler	size_t lastReservedPages, size_t reservePages);
6009149281SAxel Dörfler
61f72376a8SAxel Dörfler
62324fc66bSAxel Dörflerstatic struct cache_module_info *sCacheModule;
637491000fSIngo Weinholdstatic const uint8 kZeroBuffer[4096] = {};
64324fc66bSAxel Dörfler
65324fc66bSAxel Dörfler
6611a3346cSAxel Dörfler//	#pragma mark -
6711a3346cSAxel Dörfler
6811a3346cSAxel Dörfler
69f72376a8SAxel Dörflerstatic void
70f72376a8SAxel Dörfleradd_to_iovec(iovec *vecs, int32 &index, int32 max, addr_t address, size_t size)
71f72376a8SAxel Dörfler{
72b50494aaSAxel Dörfler	if (index > 0 && (addr_t)vecs[index - 1].iov_base
73b50494aaSAxel Dörfler			+ vecs[index - 1].iov_len == address) {
74f72376a8SAxel Dörfler		// the iovec can be combined with the previous one
75f72376a8SAxel Dörfler		vecs[index - 1].iov_len += size;
76f72376a8SAxel Dörfler		return;
77f72376a8SAxel Dörfler	}
78f72376a8SAxel Dörfler
79139353cfSAxel Dörfler	if (index == max)
80139353cfSAxel Dörfler		panic("no more space for iovecs!");
81139353cfSAxel Dörfler
82f72376a8SAxel Dörfler	// we need to start a new iovec
83f72376a8SAxel Dörfler	vecs[index].iov_base = (void *)address;
84f72376a8SAxel Dörfler	vecs[index].iov_len = size;
85f72376a8SAxel Dörfler	index++;
86f72376a8SAxel Dörfler}
87f72376a8SAxel Dörfler
88f72376a8SAxel Dörfler
89c6573329SAxel Dörflerstatic inline bool
90c6573329SAxel Dörfleraccess_is_sequential(file_cache_ref *ref)
91c6573329SAxel Dörfler{
92c6573329SAxel Dörfler	return ref->last_access[ref->last_access_index] != 0;
93c6573329SAxel Dörfler}
94c6573329SAxel Dörfler
95c6573329SAxel Dörfler
96c6573329SAxel Dörflerstatic inline void
97c6573329SAxel Dörflerpush_access(file_cache_ref *ref, off_t offset, size_t bytes, bool isWrite)
98c6573329SAxel Dörfler{
99c6573329SAxel Dörfler	TRACE(("%p: push %Ld, %ld, %s\n", ref, offset, bytes,
100c6573329SAxel Dörfler		isWrite ? "write" : "read"));
101c6573329SAxel Dörfler
102c6573329SAxel Dörfler	int32 index = ref->last_access_index;
103c6573329SAxel Dörfler	int32 previous = index - 1;
104c6573329SAxel Dörfler	if (previous < 0)
105c6573329SAxel Dörfler		previous = LAST_ACCESSES - 1;
106c6573329SAxel Dörfler
107c6573329SAxel Dörfler	if (offset != ref->last_access[previous])
108c6573329SAxel Dörfler		ref->last_access[previous] = 0;
109c6573329SAxel Dörfler
110c6573329SAxel Dörfler	// we remember writes as negative offsets
111c6573329SAxel Dörfler	if (isWrite)
112c6573329SAxel Dörfler		ref->last_access[index] = -offset - bytes;
113c6573329SAxel Dörfler	else
114c6573329SAxel Dörfler		ref->last_access[index] = offset + bytes;
115c6573329SAxel Dörfler
116c6573329SAxel Dörfler	if (++index >= LAST_ACCESSES)
117c6573329SAxel Dörfler		index = 0;
118c6573329SAxel Dörfler	ref->last_access_index = index;
119c6573329SAxel Dörfler}
120c6573329SAxel Dörfler
121c6573329SAxel Dörfler
122c6573329SAxel Dörflerstatic void
123c6573329SAxel Dörflerreserve_pages(file_cache_ref *ref, size_t reservePages, bool isWrite)
124c6573329SAxel Dörfler{
1255c99d639SIngo Weinhold	if (low_resource_state(B_KERNEL_RESOURCE_PAGES) != B_NO_LOW_RESOURCE) {
126c6573329SAxel Dörfler		vm_cache *cache = ref->cache;
1275c99d639SIngo Weinhold		cache->Lock();
128c6573329SAxel Dörfler
129c6573329SAxel Dörfler		if (list_is_empty(&cache->consumers) && cache->areas == NULL
130c6573329SAxel Dörfler			&& access_is_sequential(ref)) {
131c6573329SAxel Dörfler			// we are not mapped, and we're accessed sequentially
132c6573329SAxel Dörfler
133c6573329SAxel Dörfler			if (isWrite) {
134c6573329SAxel Dörfler				// just schedule some pages to be written back
135e1b630c5SIngo Weinhold				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
136e1b630c5SIngo Weinhold						vm_page* page = it.Next();) {
137c6573329SAxel Dörfler					if (page->state == PAGE_STATE_MODIFIED) {
138c6573329SAxel Dörfler						// TODO: for now, we only schedule one
139c6573329SAxel Dörfler						vm_page_schedule_write_page(page);
140c6573329SAxel Dörfler						break;
141c6573329SAxel Dörfler					}
142c6573329SAxel Dörfler				}
143c6573329SAxel Dörfler			} else {
144c6573329SAxel Dörfler				// free some pages from our cache
145e1b630c5SIngo Weinhold				// TODO: start with oldest
146c6573329SAxel Dörfler				uint32 left = reservePages;
147e1b630c5SIngo Weinhold				vm_page *page;
148e1b630c5SIngo Weinhold				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
149e1b630c5SIngo Weinhold						(page = it.Next()) != NULL && left > 0;) {
150c6573329SAxel Dörfler					if (page->state != PAGE_STATE_MODIFIED
151c6573329SAxel Dörfler						&& page->state != PAGE_STATE_BUSY) {
1525c99d639SIngo Weinhold						cache->RemovePage(page);
153c6573329SAxel Dörfler						vm_page_set_state(page, PAGE_STATE_FREE);
154c6573329SAxel Dörfler						left--;
155c6573329SAxel Dörfler					}
156c6573329SAxel Dörfler				}
157c6573329SAxel Dörfler			}
158c6573329SAxel Dörfler		}
1595c99d639SIngo Weinhold		cache->Unlock();
160c6573329SAxel Dörfler	}
161c6573329SAxel Dörfler
162c6573329SAxel Dörfler	vm_page_reserve_pages(reservePages);
163c6573329SAxel Dörfler}
164c6573329SAxel Dörfler
165c6573329SAxel Dörfler
1660633dcc2SAxel Dörfler/*!	Reads the requested amount of data into the cache, and allocates
1670633dcc2SAxel Dörfler	pages needed to fulfill that request. This function is called by cache_io().
1680633dcc2SAxel Dörfler	It can only handle a certain amount of bytes, and the caller must make
169061816eeSAxel Dörfler	sure that it matches that criterion.
1700633dcc2SAxel Dörfler	The cache_ref lock must be hold when calling this function; during
1710633dcc2SAxel Dörfler	operation it will unlock the cache, though.
172061816eeSAxel Dörfler*/
1730710d59cSAxel Dörflerstatic status_t
1743d268edaSAxel Dörflerread_into_cache(file_cache_ref *ref, void *cookie, off_t offset,
175a121b8c8SAxel Dörfler	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
1763d268edaSAxel Dörfler	size_t lastReservedPages, size_t reservePages)
177f72376a8SAxel Dörfler{
178cfe386c2SAxel Dörfler	TRACE(("read_into_cache(offset = %Ld, pageOffset = %ld, buffer = %#lx, "
179cfe386c2SAxel Dörfler		"bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize));
180f72376a8SAxel Dörfler
18158f6e8e5SAxel Dörfler	vm_cache *cache = ref->cache;
1820f6c560eSAxel Dörfler
183279c6b76SIngo Weinhold	// TODO: We're using way too much stack! Rather allocate a sufficiently
184279c6b76SIngo Weinhold	// large chunk on the heap.
185f72376a8SAxel Dörfler	iovec vecs[MAX_IO_VECS];
186f72376a8SAxel Dörfler	int32 vecCount = 0;
187f72376a8SAxel Dörfler
188cfe386c2SAxel Dörfler	size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize);
1890f6c560eSAxel Dörfler	vm_page *pages[MAX_IO_VECS];
1906cef245eSIngo Weinhold	ConditionVariable busyConditions[MAX_IO_VECS];
191f72376a8SAxel Dörfler	int32 pageIndex = 0;
192f72376a8SAxel Dörfler
193f72376a8SAxel Dörfler	// allocate pages for the cache and mark them busy
19470a11cecSAxel Dörfler	for (size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) {
195b50494aaSAxel Dörfler		vm_page *page = pages[pageIndex++] = vm_page_allocate_page(
19620b232e9SAxel Dörfler			PAGE_STATE_FREE, true);
197139353cfSAxel Dörfler		if (page == NULL)
198139353cfSAxel Dörfler			panic("no more pages!");
199139353cfSAxel Dörfler
200279c6b76SIngo Weinhold		busyConditions[pageIndex - 1].Publish(page, "page");
201f72376a8SAxel Dörfler
2025c99d639SIngo Weinhold		cache->InsertPage(page, offset + pos);
203f72376a8SAxel Dörfler
2047f12cc54SIngo Weinhold		add_to_iovec(vecs, vecCount, MAX_IO_VECS,
2057f12cc54SIngo Weinhold			page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE);
206061816eeSAxel Dörfler			// TODO: check if the array is large enough (currently panics)!
207f72376a8SAxel Dörfler	}
208f72376a8SAxel Dörfler
2093d268edaSAxel Dörfler	push_access(ref, offset, bufferSize, false);
2105c99d639SIngo Weinhold	cache->Unlock();
21120b232e9SAxel Dörfler	vm_page_unreserve_pages(lastReservedPages);
212a1d09631SAxel Dörfler
213f72376a8SAxel Dörfler	// read file into reserved pages
2143d268edaSAxel Dörfler	status_t status = vfs_read_pages(ref->vnode, cookie, offset, vecs,
2157f12cc54SIngo Weinhold		vecCount, B_PHYSICAL_IO_REQUEST, &numBytes);
216f72376a8SAxel Dörfler	if (status < B_OK) {
2172b028fcaSAxel Dörfler		// reading failed, free allocated pages
2182b028fcaSAxel Dörfler
2192b028fcaSAxel Dörfler		dprintf("file_cache: read pages failed: %s\n", strerror(status));
2202b028fcaSAxel Dörfler
2215c99d639SIngo Weinhold		cache->Lock();
222b2707997SStephan Aßmus
2232b028fcaSAxel Dörfler		for (int32 i = 0; i < pageIndex; i++) {
224279c6b76SIngo Weinhold			busyConditions[i].Unpublish();
2255c99d639SIngo Weinhold			cache->RemovePage(pages[i]);
2262b028fcaSAxel Dörfler			vm_page_set_state(pages[i], PAGE_STATE_FREE);
2272b028fcaSAxel Dörfler		}
2282b028fcaSAxel Dörfler
229f72376a8SAxel Dörfler		return status;
230f72376a8SAxel Dörfler	}
231f72376a8SAxel Dörfler
232a121b8c8SAxel Dörfler	// copy the pages if needed and unmap them again
233f72376a8SAxel Dörfler
2347f12cc54SIngo Weinhold	for (int32 i = 0; i < pageIndex; i++) {
235a121b8c8SAxel Dörfler		if (useBuffer && bufferSize != 0) {
2367f12cc54SIngo Weinhold			addr_t virtualAddress;
2377f12cc54SIngo Weinhold			if (vm_get_physical_page(
2387f12cc54SIngo Weinhold					pages[i]->physical_page_number * B_PAGE_SIZE,
2391b6eff28SIngo Weinhold					&virtualAddress, 0) < B_OK) {
2407f12cc54SIngo Weinhold				panic("could not get physical page");
2417f12cc54SIngo Weinhold			}
242f72376a8SAxel Dörfler
2437f12cc54SIngo Weinhold			size_t bytes = min_c(bufferSize, (size_t)B_PAGE_SIZE - pageOffset);
2447f12cc54SIngo Weinhold
2457f12cc54SIngo Weinhold			user_memcpy((void*)buffer, (void*)(virtualAddress + pageOffset),
2467f12cc54SIngo Weinhold				bytes);
247f72376a8SAxel Dörfler			buffer += bytes;
248f72376a8SAxel Dörfler			bufferSize -= bytes;
2490f6c560eSAxel Dörfler			pageOffset = 0;
250f72376a8SAxel Dörfler
2517f12cc54SIngo Weinhold			vm_put_physical_page(virtualAddress);
252b50494aaSAxel Dörfler		}
253f72376a8SAxel Dörfler	}
254f72376a8SAxel Dörfler
255c6573329SAxel Dörfler	reserve_pages(ref, reservePages, false);
2565c99d639SIngo Weinhold	cache->Lock();
257a1d09631SAxel Dörfler
258f72376a8SAxel Dörfler	// make the pages accessible in the cache
259279c6b76SIngo Weinhold	for (int32 i = pageIndex; i-- > 0;) {
260f72376a8SAxel Dörfler		pages[i]->state = PAGE_STATE_ACTIVE;
2616d4aea47SAxel Dörfler
262279c6b76SIngo Weinhold		busyConditions[i].Unpublish();
263279c6b76SIngo Weinhold	}
264f72376a8SAxel Dörfler
265f72376a8SAxel Dörfler	return B_OK;
266f72376a8SAxel Dörfler}
267f72376a8SAxel Dörfler
268f72376a8SAxel Dörfler
269cfe386c2SAxel Dörflerstatic status_t
2703d268edaSAxel Dörflerread_from_file(file_cache_ref *ref, void *cookie, off_t offset,
271a121b8c8SAxel Dörfler	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
2723d268edaSAxel Dörfler	size_t lastReservedPages, size_t reservePages)
273cfe386c2SAxel Dörfler{
2749ff70e74SAxel Dörfler	TRACE(("read_from_file(offset = %Ld, pageOffset = %ld, buffer = %#lx, "
2759ff70e74SAxel Dörfler		"bufferSize = %lu\n", offset, pageOffset, buffer, bufferSize));
2769ff70e74SAxel Dörfler
277a121b8c8SAxel Dörfler	if (!useBuffer)
278a121b8c8SAxel Dörfler		return B_OK;
279a121b8c8SAxel Dörfler
280cfe386c2SAxel Dörfler	iovec vec;
281cfe386c2SAxel Dörfler	vec.iov_base = (void *)buffer;
282cfe386c2SAxel Dörfler	vec.iov_len = bufferSize;
283cfe386c2SAxel Dörfler
2843d268edaSAxel Dörfler	push_access(ref, offset, bufferSize, false);
2855c99d639SIngo Weinhold	ref->cache->Unlock();
286cfe386c2SAxel Dörfler	vm_page_unreserve_pages(lastReservedPages);
287cfe386c2SAxel Dörfler
2889ff70e74SAxel Dörfler	status_t status = vfs_read_pages(ref->vnode, cookie, offset + pageOffset,
2897f12cc54SIngo Weinhold		&vec, 1, 0, &bufferSize);
2907f12cc54SIngo Weinhold
291cfe386c2SAxel Dörfler	if (status == B_OK)
292cfe386c2SAxel Dörfler		reserve_pages(ref, reservePages, false);
293cfe386c2SAxel Dörfler
2945c99d639SIngo Weinhold	ref->cache->Lock();
295cfe386c2SAxel Dörfler
296cfe386c2SAxel Dörfler	return status;
297cfe386c2SAxel Dörfler}
298cfe386c2SAxel Dörfler
299cfe386c2SAxel Dörfler
3000633dcc2SAxel Dörfler/*!	Like read_into_cache() but writes data into the cache.
3010633dcc2SAxel Dörfler	To preserve data consistency, it might also read pages into the cache,
3020633dcc2SAxel Dörfler	though, if only a partial page gets written.
3030633dcc2SAxel Dörfler	The same restrictions apply.
304061816eeSAxel Dörfler*/
3050710d59cSAxel Dörflerstatic status_t
3063d268edaSAxel Dörflerwrite_to_cache(file_cache_ref *ref, void *cookie, off_t offset,
307a121b8c8SAxel Dörfler	int32 pageOffset, addr_t buffer, size_t bufferSize, bool useBuffer,
3083d268edaSAxel Dörfler	size_t lastReservedPages, size_t reservePages)
3090f6c560eSAxel Dörfler{
310279c6b76SIngo Weinhold	// TODO: We're using way too much stack! Rather allocate a sufficiently
311279c6b76SIngo Weinhold	// large chunk on the heap.
3120f6c560eSAxel Dörfler	iovec vecs[MAX_IO_VECS];
3130f6c560eSAxel Dörfler	int32 vecCount = 0;
314cfe386c2SAxel Dörfler	size_t numBytes = PAGE_ALIGN(pageOffset + bufferSize);
3150f6c560eSAxel Dörfler	vm_page *pages[MAX_IO_VECS];
3160f6c560eSAxel Dörfler	int32 pageIndex = 0;
31799e1c71eSAxel Dörfler	status_t status = B_OK;
3186cef245eSIngo Weinhold	ConditionVariable busyConditions[MAX_IO_VECS];
31999e1c71eSAxel Dörfler
32099e1c71eSAxel Dörfler	// ToDo: this should be settable somewhere
32199e1c71eSAxel Dörfler	bool writeThrough = false;
322d7975126SAxel Dörfler
323d7975126SAxel Dörfler	// allocate pages for the cache and mark them busy
32470a11cecSAxel Dörfler	for (size_t pos = 0; pos < numBytes; pos += B_PAGE_SIZE) {
3258e0f884cSAxel Dörfler		// TODO: if space is becoming tight, and this cache is already grown
3265913a657SAxel Dörfler		//	big - shouldn't we better steal the pages directly in that case?
3275913a657SAxel Dörfler		//	(a working set like approach for the file cache)
3288e0f884cSAxel Dörfler		// TODO: the pages we allocate here should have been reserved upfront
3298e0f884cSAxel Dörfler		//	in cache_io()
330b50494aaSAxel Dörfler		vm_page *page = pages[pageIndex++] = vm_page_allocate_page(
33120b232e9SAxel Dörfler			PAGE_STATE_FREE, true);
332279c6b76SIngo Weinhold		busyConditions[pageIndex - 1].Publish(page, "page");
333d7975126SAxel Dörfler
3345c99d639SIngo Weinhold		ref->cache->InsertPage(page, offset + pos);
335d7975126SAxel Dörfler
3367b8683b2SIngo Weinhold		add_to_iovec(vecs, vecCount, MAX_IO_VECS,
3377b8683b2SIngo Weinhold		page->physical_page_number * B_PAGE_SIZE, B_PAGE_SIZE);
33899e1c71eSAxel Dörfler	}
33999e1c71eSAxel Dörfler
3403d268edaSAxel Dörfler	push_access(ref, offset, bufferSize, true);
3415c99d639SIngo Weinhold	ref->cache->Unlock();
34220b232e9SAxel Dörfler	vm_page_unreserve_pages(lastReservedPages);
343d7975126SAxel Dörfler
34499e1c71eSAxel Dörfler	// copy contents (and read in partially written pages first)
34599e1c71eSAxel Dörfler
34699e1c71eSAxel Dörfler	if (pageOffset != 0) {
34799e1c71eSAxel Dörfler		// This is only a partial write, so we have to read the rest of the page
34899e1c71eSAxel Dörfler		// from the file to have consistent data in the cache
34999e1c71eSAxel Dörfler		iovec readVec = { vecs[0].iov_base, B_PAGE_SIZE };
35099e1c71eSAxel Dörfler		size_t bytesRead = B_PAGE_SIZE;
35199e1c71eSAxel Dörfler
3527b8683b2SIngo Weinhold		status = vfs_read_pages(ref->vnode, cookie, offset, &readVec, 1,
3537b8683b2SIngo Weinhold			B_PHYSICAL_IO_REQUEST, &bytesRead);
35499e1c71eSAxel Dörfler		// ToDo: handle errors for real!
35599e1c71eSAxel Dörfler		if (status < B_OK)
3563d268edaSAxel Dörfler			panic("1. vfs_read_pages() failed: %s!\n", strerror(status));
35799e1c71eSAxel Dörfler	}
35899e1c71eSAxel Dörfler
359e6b68254SAxel Dörfler	addr_t lastPageOffset = (pageOffset + bufferSize) & (B_PAGE_SIZE - 1);
36099e1c71eSAxel Dörfler	if (lastPageOffset != 0) {
36199e1c71eSAxel Dörfler		// get the last page in the I/O vectors
36299e1c71eSAxel Dörfler		addr_t last = (addr_t)vecs[vecCount - 1].iov_base
36399e1c71eSAxel Dörfler			+ vecs[vecCount - 1].iov_len - B_PAGE_SIZE;
36499e1c71eSAxel Dörfler
3655c99d639SIngo Weinhold		if (offset + pageOffset + bufferSize == ref->cache->virtual_end) {
36699e1c71eSAxel Dörfler			// the space in the page after this write action needs to be cleaned
3677b8683b2SIngo Weinhold			memset_physical(last + lastPageOffset, 0,
36858f6e8e5SAxel Dörfler				B_PAGE_SIZE - lastPageOffset);
369cfe386c2SAxel Dörfler		} else {
37099e1c71eSAxel Dörfler			// the end of this write does not happen on a page boundary, so we
37199e1c71eSAxel Dörfler			// need to fetch the last page before we can update it
37299e1c71eSAxel Dörfler			iovec readVec = { (void *)last, B_PAGE_SIZE };
373d7975126SAxel Dörfler			size_t bytesRead = B_PAGE_SIZE;
37499e1c71eSAxel Dörfler
3753d268edaSAxel Dörfler			status = vfs_read_pages(ref->vnode, cookie,
3763d268edaSAxel Dörfler				PAGE_ALIGN(offset + pageOffset + bufferSize) - B_PAGE_SIZE,
3777b8683b2SIngo Weinhold				&readVec, 1, B_PHYSICAL_IO_REQUEST, &bytesRead);
37899e1c71eSAxel Dörfler			// ToDo: handle errors for real!
37999e1c71eSAxel Dörfler			if (status < B_OK)
3803d268edaSAxel Dörfler				panic("vfs_read_pages() failed: %s!\n", strerror(status));
381cfe386c2SAxel Dörfler
382cfe386c2SAxel Dörfler			if (bytesRead < B_PAGE_SIZE) {
383cfe386c2SAxel Dörfler				// the space beyond the file size needs to be cleaned
3847b8683b2SIngo Weinhold				memset_physical(last + bytesRead, 0, B_PAGE_SIZE - bytesRead);
385cfe386c2SAxel Dörfler			}
386d7975126SAxel Dörfler		}
38799e1c71eSAxel Dörfler	}
388d7975126SAxel Dörfler
38999e1c71eSAxel Dörfler	for (int32 i = 0; i < vecCount; i++) {
39099e1c71eSAxel Dörfler		addr_t base = (addr_t)vecs[i].iov_base;
3917b8683b2SIngo Weinhold		size_t bytes = min_c(bufferSize,
3927b8683b2SIngo Weinhold			size_t(vecs[i].iov_len - pageOffset));
393257d99f2SAxel Dörfler
394a121b8c8SAxel Dörfler		if (useBuffer) {
395a121b8c8SAxel Dörfler			// copy data from user buffer
3967b8683b2SIngo Weinhold			memcpy_to_physical(base + pageOffset, (void *)buffer, bytes, true);
397a121b8c8SAxel Dörfler		} else {
398a121b8c8SAxel Dörfler			// clear buffer instead
3997b8683b2SIngo Weinhold			memset_physical(base + pageOffset, 0, bytes);
400a121b8c8SAxel Dörfler		}
4010f6c560eSAxel Dörfler
40299e1c71eSAxel Dörfler		bufferSize -= bytes;
40399e1c71eSAxel Dörfler		if (bufferSize == 0)
40499e1c71eSAxel Dörfler			break;
40599e1c71eSAxel Dörfler
40699e1c71eSAxel Dörfler		buffer += bytes;
4070f6c560eSAxel Dörfler		pageOffset = 0;
408d7975126SAxel Dörfler	}
409d7975126SAxel Dörfler
41099e1c71eSAxel Dörfler	if (writeThrough) {
41199e1c71eSAxel Dörfler		// write cached pages back to the file if we were asked to do that
4123d268edaSAxel Dörfler		status_t status = vfs_write_pages(ref->vnode, cookie, offset, vecs,
4137b8683b2SIngo Weinhold			vecCount, B_PHYSICAL_IO_REQUEST, &numBytes);
41499e1c71eSAxel Dörfler		if (status < B_OK) {
41599e1c71eSAxel Dörfler			// ToDo: remove allocated pages, ...?
41699e1c71eSAxel Dörfler			panic("file_cache: remove allocated pages! write pages failed: %s\n",
41799e1c71eSAxel Dörfler				strerror(status));
41899e1c71eSAxel Dörfler		}
419d7975126SAxel Dörfler	}
42099e1c71eSAxel Dörfler
42120b232e9SAxel Dörfler	if (status == B_OK)
422c6573329SAxel Dörfler		reserve_pages(ref, reservePages, true);
42320b232e9SAxel Dörfler
4245c99d639SIngo Weinhold	ref->cache->Lock();
425d7975126SAxel Dörfler
426d7975126SAxel Dörfler	// make the pages accessible in the cache
427257d99f2SAxel Dörfler	for (int32 i = pageIndex; i-- > 0;) {
428279c6b76SIngo Weinhold		busyConditions[i].Unpublish();
429279c6b76SIngo Weinhold
43099e1c71eSAxel Dörfler		if (writeThrough)
431257d99f2SAxel Dörfler			pages[i]->state = PAGE_STATE_ACTIVE;
43299e1c71eSAxel Dörfler		else
43399e1c71eSAxel Dörfler			vm_page_set_state(pages[i], PAGE_STATE_MODIFIED);
434257d99f2SAxel Dörfler	}
435d7975126SAxel Dörfler
43699e1c71eSAxel Dörfler	return status;
437d7975126SAxel Dörfler}
438d7975126SAxel Dörfler
439d7975126SAxel Dörfler
4400f6c560eSAxel Dörflerstatic status_t
4413d268edaSAxel Dörflerwrite_to_file(file_cache_ref *ref, void *cookie, off_t offset, int32 pageOffset,
442a121b8c8SAxel Dörfler	addr_t buffer, size_t bufferSize, bool useBuffer, size_t lastReservedPages,
443cfe386c2SAxel Dörfler	size_t reservePages)
44409149281SAxel Dörfler{
4457f12cc54SIngo Weinhold	size_t chunkSize = 0;
446a121b8c8SAxel Dörfler	if (!useBuffer) {
447a121b8c8SAxel Dörfler		// we need to allocate a zero buffer
448a121b8c8SAxel Dörfler		// TODO: use smaller buffers if this fails
449a121b8c8SAxel Dörfler		chunkSize = min_c(bufferSize, B_PAGE_SIZE);
450a121b8c8SAxel Dörfler		buffer = (addr_t)malloc(chunkSize);
451a121b8c8SAxel Dörfler		if (buffer == 0)
452a121b8c8SAxel Dörfler			return B_NO_MEMORY;
453a121b8c8SAxel Dörfler
454a121b8c8SAxel Dörfler		memset((void *)buffer, 0, chunkSize);
455a121b8c8SAxel Dörfler	}
456a121b8c8SAxel Dörfler
45709149281SAxel Dörfler	iovec vec;
45809149281SAxel Dörfler	vec.iov_base = (void *)buffer;
45909149281SAxel Dörfler	vec.iov_len = bufferSize;
46009149281SAxel Dörfler
4613d268edaSAxel Dörfler	push_access(ref, offset, bufferSize, true);
4625c99d639SIngo Weinhold	ref->cache->Unlock();
46309149281SAxel Dörfler	vm_page_unreserve_pages(lastReservedPages);
46409149281SAxel Dörfler
465c12ed856SAxel Dörfler	status_t status = B_OK;
466a121b8c8SAxel Dörfler
467a121b8c8SAxel Dörfler	if (!useBuffer) {
468a121b8c8SAxel Dörfler		while (bufferSize > 0) {
469a121b8c8SAxel Dörfler			if (bufferSize < chunkSize)
470a121b8c8SAxel Dörfler				chunkSize = bufferSize;
471a121b8c8SAxel Dörfler
472a121b8c8SAxel Dörfler			status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset,
4737f12cc54SIngo Weinhold				&vec, 1, 0, &chunkSize);
474a121b8c8SAxel Dörfler			if (status < B_OK)
475a121b8c8SAxel Dörfler				break;
476a121b8c8SAxel Dörfler
477a121b8c8SAxel Dörfler			bufferSize -= chunkSize;
478a121b8c8SAxel Dörfler			pageOffset += chunkSize;
479a121b8c8SAxel Dörfler		}
4809e1ea0e7SIngo Weinhold
4819e1ea0e7SIngo Weinhold		free((void*)buffer);
482a121b8c8SAxel Dörfler	} else {
483a121b8c8SAxel Dörfler		status = vfs_write_pages(ref->vnode, cookie, offset + pageOffset,
4847f12cc54SIngo Weinhold			&vec, 1, 0, &bufferSize);
485a121b8c8SAxel Dörfler	}
486a121b8c8SAxel Dörfler
48709149281SAxel Dörfler	if (status == B_OK)
48809149281SAxel Dörfler		reserve_pages(ref, reservePages, true);
48909149281SAxel Dörfler
4905c99d639SIngo Weinhold	ref->cache->Lock();
49109149281SAxel Dörfler
49209149281SAxel Dörfler	return status;
49309149281SAxel Dörfler}
49409149281SAxel Dörfler
49509149281SAxel Dörfler
49609149281SAxel Dörflerstatic inline status_t
4973d268edaSAxel Dörflersatisfy_cache_io(file_cache_ref *ref, void *cookie, cache_func function,
498a121b8c8SAxel Dörfler	off_t offset, addr_t buffer, bool useBuffer, int32 &pageOffset,
499a121b8c8SAxel Dörfler	size_t bytesLeft, size_t &reservePages, off_t &lastOffset,
500a121b8c8SAxel Dörfler	addr_t &lastBuffer, int32 &lastPageOffset, size_t &lastLeft,
501a121b8c8SAxel Dörfler	size_t &lastReservedPages)
5020f6c560eSAxel Dörfler{
5030633dcc2SAxel Dörfler	if (lastBuffer == buffer)
5040633dcc2SAxel Dörfler		return B_OK;
5050f6c560eSAxel Dörfler
5060633dcc2SAxel Dörfler	size_t requestSize = buffer - lastBuffer;
5079edc2b52SAxel Dörfler	reservePages = min_c(MAX_IO_VECS, (lastLeft - requestSize
5089edc2b52SAxel Dörfler		+ lastPageOffset + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
50920b232e9SAxel Dörfler
5103d268edaSAxel Dörfler	status_t status = function(ref, cookie, lastOffset, lastPageOffset,