1/*
2 * Copyright 2008-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6// included by vfs.cpp
7
8
9//#define TRACE_VFS_REQUEST_IO
10#ifdef TRACE_VFS_REQUEST_IO
11#	define TRACE_RIO(x...) dprintf(x)
12#else
13#	define TRACE_RIO(x...) do {} while (false)
14#endif
15
16
17#include <heap.h>
18#include <AutoDeleterDrivers.h>
19
20
21// #pragma mark - AsyncIOCallback
22
23
24AsyncIOCallback::~AsyncIOCallback()
25{
26}
27
28
29/* static */ status_t
30AsyncIOCallback::IORequestCallback(void* data, io_request* request,
31	status_t status, bool partialTransfer, generic_size_t transferEndOffset)
32{
33	((AsyncIOCallback*)data)->IOFinished(status, partialTransfer,
34		transferEndOffset);
35	return B_OK;
36}
37
38
39// #pragma mark - StackableAsyncIOCallback
40
41
42StackableAsyncIOCallback::StackableAsyncIOCallback(AsyncIOCallback* next)
43	:
44	fNextCallback(next)
45{
46}
47
48
49// #pragma mark -
50
51
52struct iterative_io_cookie {
53	struct vnode*					vnode;
54	file_descriptor*				descriptor;
55	iterative_io_get_vecs			get_vecs;
56	iterative_io_finished			finished;
57	void*							cookie;
58	off_t							request_offset;
59	io_request_finished_callback	next_finished_callback;
60	void*							next_finished_cookie;
61};
62
63
64class DoIO {
65public:
66	DoIO(bool write)
67		:
68		fWrite(write)
69	{
70	}
71
72	virtual	~DoIO()
73	{
74	}
75
76	virtual status_t IO(off_t offset, void* buffer, size_t* length) = 0;
77
78protected:
79	bool	fWrite;
80};
81
82
83class CallbackIO : public DoIO {
84public:
85	CallbackIO(bool write,
86			status_t (*doIO)(void* cookie, off_t offset, void* buffer,
87				size_t* length),
88			void* cookie)
89		:
90		DoIO(write),
91		fDoIO(doIO),
92		fCookie(cookie)
93	{
94	}
95
96	virtual status_t IO(off_t offset, void* buffer, size_t* length)
97	{
98		return fDoIO(fCookie, offset, buffer, length);
99	}
100
101private:
102	status_t (*fDoIO)(void*, off_t, void*, size_t*);
103	void*		fCookie;
104};
105
106
107class VnodeIO : public DoIO {
108public:
109	VnodeIO(bool write, struct vnode* vnode, void* cookie)
110		:
111		DoIO(write),
112		fVnode(vnode),
113		fCookie(cookie)
114	{
115	}
116
117	virtual status_t IO(off_t offset, void* buffer, size_t* length)
118	{
119		iovec vec;
120		vec.iov_base = buffer;
121		vec.iov_len = *length;
122
123		if (fWrite) {
124			return FS_CALL(fVnode, write_pages, fCookie, offset, &vec, 1,
125				length);
126		}
127
128		return FS_CALL(fVnode, read_pages, fCookie, offset, &vec, 1, length);
129	}
130
131private:
132	struct vnode*	fVnode;
133	void*			fCookie;
134};
135
136
137static status_t
138do_iterative_fd_io_iterate(void* _cookie, io_request* request,
139	bool* _partialTransfer)
140{
141	TRACE_RIO("[%ld] do_iterative_fd_io_iterate(request: %p)\n",
142		find_thread(NULL), request);
143
144	static const size_t kMaxSubRequests = 8;
145
146	iterative_io_cookie* cookie = (iterative_io_cookie*)_cookie;
147
148	request->DeleteSubRequests();
149
150	off_t requestOffset = cookie->request_offset;
151	size_t requestLength = request->Length()
152		- (requestOffset - request->Offset());
153
154	// get the next file vecs
155	file_io_vec vecs[kMaxSubRequests];
156	size_t vecCount = kMaxSubRequests;
157	status_t error = cookie->get_vecs(cookie->cookie, request, requestOffset,
158		requestLength, vecs, &vecCount);
159	if (error != B_OK && error != B_BUFFER_OVERFLOW)
160		return error;
161	if (vecCount == 0) {
162		*_partialTransfer = true;
163		return B_OK;
164	}
165	TRACE_RIO("[%ld]  got %zu file vecs\n", find_thread(NULL), vecCount);
166
167	// Reset the error code for the loop below
168	error = B_OK;
169
170	// create subrequests for the file vecs we've got
171	size_t subRequestCount = 0;
172	for (size_t i = 0;
173		i < vecCount && subRequestCount < kMaxSubRequests && error == B_OK;
174		i++) {
175		off_t vecOffset = vecs[i].offset;
176		off_t vecLength = min_c(vecs[i].length, (off_t)requestLength);
177		TRACE_RIO("[%ld]    vec %lu offset: %lld, length: %lld\n",
178			find_thread(NULL), i, vecOffset, vecLength);
179
180		// Special offset -1 means that this is part of sparse file that is
181		// zero. We fill it in right here.
182		if (vecOffset == -1) {
183			if (request->IsWrite()) {
184				panic("do_iterative_fd_io_iterate(): write to sparse file "
185					"vector");
186				error = B_BAD_VALUE;
187				break;
188			}
189
190			error = request->ClearData(requestOffset, vecLength);
191			if (error != B_OK)
192				break;
193
194			requestOffset += vecLength;
195			requestLength -= vecLength;
196			continue;
197		}
198
199		while (vecLength > 0 && subRequestCount < kMaxSubRequests) {
200			TRACE_RIO("[%ld]    creating subrequest: offset: %lld, length: "
201				"%lld\n", find_thread(NULL), vecOffset, vecLength);
202			IORequest* subRequest;
203			error = request->CreateSubRequest(requestOffset, vecOffset,
204				vecLength, subRequest);
205			if (error != B_OK)
206				break;
207
208			subRequestCount++;
209
210			size_t lengthProcessed = subRequest->Length();
211			vecOffset += lengthProcessed;
212			vecLength -= lengthProcessed;
213			requestOffset += lengthProcessed;
214			requestLength -= lengthProcessed;
215		}
216	}
217
218	// Only if we couldn't create any subrequests, we fail.
219	if (error != B_OK && subRequestCount == 0)
220		return error;
221
222	// Reset the error code for the loop below
223	error = B_OK;
224
225	request->Advance(requestOffset - cookie->request_offset);
226	cookie->request_offset = requestOffset;
227
228	// If we don't have any sub requests at this point, that means all that
229	// remained were zeroed sparse file vectors. So the request is done now.
230	if (subRequestCount == 0) {
231		ASSERT(request->RemainingBytes() == 0);
232		request->SetStatusAndNotify(B_OK);
233		return B_OK;
234	}
235
236	// Schedule the subrequests.
237	IORequest* nextSubRequest = request->FirstSubRequest();
238	while (nextSubRequest != NULL) {
239		IORequest* subRequest = nextSubRequest;
240		nextSubRequest = request->NextSubRequest(subRequest);
241
242		if (error == B_OK) {
243			TRACE_RIO("[%ld]  scheduling subrequest: %p\n", find_thread(NULL),
244				subRequest);
245			error = vfs_vnode_io(cookie->vnode, cookie->descriptor->cookie,
246				subRequest);
247		} else {
248			// Once scheduling a subrequest failed, we cancel all subsequent
249			// subrequests.
250			subRequest->SetStatusAndNotify(B_CANCELED);
251		}
252	}
253
254	// TODO: Cancel the subrequests that were scheduled successfully.
255
256	return B_OK;
257}
258
259
260static status_t
261do_iterative_fd_io_finish(void* _cookie, io_request* request, status_t status,
262	bool partialTransfer, generic_size_t transferEndOffset)
263{
264	iterative_io_cookie* cookie = (iterative_io_cookie*)_cookie;
265
266	if (cookie->finished != NULL) {
267		cookie->finished(cookie->cookie, request, status, partialTransfer,
268			transferEndOffset);
269	}
270
271	put_fd(cookie->descriptor);
272
273	if (cookie->next_finished_callback != NULL) {
274		cookie->next_finished_callback(cookie->next_finished_cookie, request,
275			status, partialTransfer, transferEndOffset);
276	}
277
278	delete cookie;
279
280	return B_OK;
281}
282
283
284static status_t
285do_synchronous_iterative_vnode_io(struct vnode* vnode, void* openCookie,
286	io_request* request, iterative_io_get_vecs getVecs,
287	iterative_io_finished finished, void* cookie)
288{
289	IOBuffer* buffer = request->Buffer();
290	VnodeIO io(request->IsWrite(), vnode, openCookie);
291
292	iovec vector;
293	void* virtualVecCookie = NULL;
294	off_t offset = request->Offset();
295	generic_size_t length = request->Length();
296
297	status_t error = B_OK;
298
299	for (; error == B_OK && length > 0
300			&& buffer->GetNextVirtualVec(virtualVecCookie, vector) == B_OK;) {
301		uint8* vecBase = (uint8*)vector.iov_base;
302		generic_size_t vecLength = min_c(vector.iov_len, length);
303
304		while (error == B_OK && vecLength > 0) {
305			file_io_vec fileVecs[8];
306			size_t fileVecCount = 8;
307			error = getVecs(cookie, request, offset, vecLength, fileVecs,
308				&fileVecCount);
309			if (error != B_OK || fileVecCount == 0)
310				break;
311
312			for (size_t i = 0; i < fileVecCount; i++) {
313				const file_io_vec& fileVec = fileVecs[i];
314				size_t toTransfer = min_c(fileVec.length, (off_t)length);
315				size_t transferred = toTransfer;
316				error = io.IO(fileVec.offset, vecBase, &transferred);
317				if (error != B_OK)
318					break;
319
320				offset += transferred;
321				length -= transferred;
322				vecBase += transferred;
323				vecLength -= transferred;
324
325				if (transferred != toTransfer)
326					break;
327			}
328		}
329	}
330
331	buffer->FreeVirtualVecCookie(virtualVecCookie);
332
333	bool partial = length > 0;
334	size_t bytesTransferred = request->Length() - length;
335	request->SetTransferredBytes(partial, bytesTransferred);
336	finished(cookie, request, error, partial, bytesTransferred);
337	request->SetStatusAndNotify(error);
338	return error;
339}
340
341
342static status_t
343synchronous_io(io_request* request, DoIO& io)
344{
345	TRACE_RIO("[%" B_PRId32 "] synchronous_io(request: %p (offset: %" B_PRIdOFF
346		", length: %" B_PRIuGENADDR "))\n", find_thread(NULL), request,
347		request->Offset(), request->Length());
348
349	IOBuffer* buffer = request->Buffer();
350
351	iovec vector;
352	void* virtualVecCookie = NULL;
353	off_t offset = request->Offset();
354	generic_size_t length = request->Length();
355
356	for (; length > 0
357			&& buffer->GetNextVirtualVec(virtualVecCookie, vector) == B_OK;) {
358		void* vecBase = (void*)(addr_t)vector.iov_base;
359		size_t vecLength = min_c(vector.iov_len, length);
360
361		TRACE_RIO("[%ld]   I/O: offset: %lld, vecBase: %p, length: %lu\n",
362			find_thread(NULL), offset, vecBase, vecLength);
363
364		size_t transferred = vecLength;
365		status_t error = io.IO(offset, vecBase, &transferred);
366		if (error != B_OK) {
367			TRACE_RIO("[%ld]   I/O failed: %#lx\n", find_thread(NULL), error);
368			buffer->FreeVirtualVecCookie(virtualVecCookie);
369			request->SetStatusAndNotify(error);
370			return error;
371		}
372
373		offset += transferred;
374		length -= transferred;
375
376		if (transferred != vecLength)
377			break;
378	}
379
380	TRACE_RIO("[%ld] synchronous_io() succeeded\n", find_thread(NULL));
381
382	buffer->FreeVirtualVecCookie(virtualVecCookie);
383	request->SetTransferredBytes(length > 0, request->Length() - length);
384	request->SetStatusAndNotify(B_OK);
385	return B_OK;
386}
387
388
389// #pragma mark - kernel private API
390
391
392status_t
393vfs_vnode_io(struct vnode* vnode, void* cookie, io_request* request)
394{
395	status_t result = B_ERROR;
396	if (!HAS_FS_CALL(vnode, io)
397		|| (result = FS_CALL(vnode, io, cookie, request)) == B_UNSUPPORTED) {
398		// no io() call -- fall back to synchronous I/O
399		VnodeIO io(request->IsWrite(), vnode, cookie);
400		return synchronous_io(request, io);
401	}
402
403	return result;
404}
405
406
407status_t
408vfs_synchronous_io(io_request* request,
409	status_t (*doIO)(void* cookie, off_t offset, void* buffer, size_t* length),
410	void* cookie)
411{
412	CallbackIO io(request->IsWrite(), doIO, cookie);
413	return synchronous_io(request, io);
414}
415
416
417status_t
418vfs_asynchronous_read_pages(struct vnode* vnode, void* cookie, off_t pos,
419	const generic_io_vec* vecs, size_t count, generic_size_t numBytes,
420	uint32 flags, AsyncIOCallback* callback)
421{
422	IORequest* request = IORequest::Create((flags & B_VIP_IO_REQUEST) != 0);
423	if (request == NULL) {
424		callback->IOFinished(B_NO_MEMORY, true, 0);
425		return B_NO_MEMORY;
426	}
427
428	status_t status = request->Init(pos, vecs, count, numBytes, false,
429		flags | B_DELETE_IO_REQUEST);
430	if (status != B_OK) {
431		delete request;
432		callback->IOFinished(status, true, 0);
433		return status;
434	}
435
436	request->SetFinishedCallback(&AsyncIOCallback::IORequestCallback,
437		callback);
438
439	return vfs_vnode_io(vnode, cookie, request);
440}
441
442
443status_t
444vfs_asynchronous_write_pages(struct vnode* vnode, void* cookie, off_t pos,
445	const generic_io_vec* vecs, size_t count, generic_size_t numBytes,
446	uint32 flags, AsyncIOCallback* callback)
447{
448	IORequest* request = IORequest::Create((flags & B_VIP_IO_REQUEST) != 0);
449	if (request == NULL) {
450		callback->IOFinished(B_NO_MEMORY, true, 0);
451		return B_NO_MEMORY;
452	}
453
454	status_t status = request->Init(pos, vecs, count, numBytes, true,
455		flags | B_DELETE_IO_REQUEST);
456	if (status != B_OK) {
457		delete request;
458		callback->IOFinished(status, true, 0);
459		return status;
460	}
461
462	request->SetFinishedCallback(&AsyncIOCallback::IORequestCallback,
463		callback);
464
465	return vfs_vnode_io(vnode, cookie, request);
466}
467
468
469// #pragma mark - public API
470
471
472status_t
473do_fd_io(int fd, io_request* request)
474{
475	struct vnode* vnode;
476	file_descriptor* descriptor = get_fd_and_vnode(fd, &vnode, true);
477	if (descriptor == NULL) {
478		request->SetStatusAndNotify(B_FILE_ERROR);
479		return B_FILE_ERROR;
480	}
481
482	FileDescriptorPutter descriptorPutter(descriptor);
483
484	return vfs_vnode_io(vnode, descriptor->cookie, request);
485}
486
487
488status_t
489do_iterative_fd_io(int fd, io_request* request, iterative_io_get_vecs getVecs,
490	iterative_io_finished finished, void* cookie)
491{
492	TRACE_RIO("[%" B_PRId32 "] do_iterative_fd_io(fd: %d, request: %p "
493		"(offset: %" B_PRIdOFF ", length: %" B_PRIuGENADDR "))\n",
494		find_thread(NULL), fd, request, request->Offset(), request->Length());
495
496	struct vnode* vnode;
497	file_descriptor* descriptor = get_fd_and_vnode(fd, &vnode, true);
498	if (descriptor == NULL) {
499		finished(cookie, request, B_FILE_ERROR, true, 0);
500		request->SetStatusAndNotify(B_FILE_ERROR);
501		return B_FILE_ERROR;
502	}
503
504	FileDescriptorPutter descriptorPutter(descriptor);
505
506	if (!HAS_FS_CALL(vnode, io)) {
507		// no io() call -- fall back to synchronous I/O
508		return do_synchronous_iterative_vnode_io(vnode, descriptor->cookie,
509			request, getVecs, finished, cookie);
510	}
511
512	iterative_io_cookie* iterationCookie
513		= (request->Flags() & B_VIP_IO_REQUEST) != 0
514			? new(malloc_flags(HEAP_PRIORITY_VIP)) iterative_io_cookie
515			: new(std::nothrow) iterative_io_cookie;
516	if (iterationCookie == NULL) {
517		// no memory -- fall back to synchronous I/O
518		return do_synchronous_iterative_vnode_io(vnode, descriptor->cookie,
519			request, getVecs, finished, cookie);
520	}
521
522	iterationCookie->vnode = vnode;
523	iterationCookie->descriptor = descriptor;
524	iterationCookie->get_vecs = getVecs;
525	iterationCookie->finished = finished;
526	iterationCookie->cookie = cookie;
527	iterationCookie->request_offset = request->Offset();
528	iterationCookie->next_finished_callback = request->FinishedCallback(
529		&iterationCookie->next_finished_cookie);
530
531	request->SetFinishedCallback(&do_iterative_fd_io_finish, iterationCookie);
532	request->SetIterationCallback(&do_iterative_fd_io_iterate, iterationCookie);
533
534	descriptorPutter.Detach();
535		// From now on the descriptor is put by our finish callback.
536
537	bool partialTransfer = false;
538	status_t error = do_iterative_fd_io_iterate(iterationCookie, request,
539		&partialTransfer);
540	if (error != B_OK || partialTransfer) {
541		if (partialTransfer) {
542			request->SetTransferredBytes(partialTransfer,
543				request->TransferredBytes());
544		}
545
546		request->SetStatusAndNotify(error);
547		return error;
548	}
549
550	return B_OK;
551}
552