1/*
2 * Copyright 2004-2007, Axel D��rfler, axeld@pinc-software.de. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include <OS.h>
8#include <fs_interface.h>
9
10#include <stdarg.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <sys/uio.h>
15
16#define TRACE_FILE_CACHE
17#define TRACE(x) printf x
18#define dprintf printf
19
20#ifndef ASSERT
21#	define ASSERT(x) ;
22#endif
23
24// maximum number of iovecs per request
25#define MAX_IO_VECS			64	// 256 kB
26#define MAX_FILE_IO_VECS	4
27#define MAX_TEMP_IO_VECS	8
28
29#define CACHED_FILE_EXTENTS	2
30	// must be smaller than MAX_FILE_IO_VECS
31	// ToDo: find out how much of these are typically used
32
33struct vm_cache_ref;
34
35struct file_extent {
36	off_t			offset;
37	file_io_vec		disk;
38};
39
40struct file_map {
41	file_map();
42	~file_map();
43
44	file_extent *operator[](uint32 index);
45	file_extent *ExtentAt(uint32 index);
46	status_t Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset);
47	void Free();
48
49	union {
50		file_extent	direct[CACHED_FILE_EXTENTS];
51		file_extent	*array;
52	};
53	size_t			count;
54};
55
56struct file_cache_ref {
57	vm_cache_ref	*cache;
58	void			*vnode;
59	void			*device;
60	void			*cookie;
61	file_map		map;
62};
63
64
65const uint32 kMaxFileVecs = 1024;
66
67file_io_vec gFileVecs[kMaxFileVecs];
68size_t gFileVecCount;
69off_t gFileSize;
70
71
72file_map::file_map()
73{
74	array = NULL;
75	count = 0;
76}
77
78
79file_map::~file_map()
80{
81	Free();
82}
83
84
85file_extent *
86file_map::operator[](uint32 index)
87{
88	return ExtentAt(index);
89}
90
91
92file_extent *
93file_map::ExtentAt(uint32 index)
94{
95	if (index >= count)
96		return NULL;
97
98	if (count > CACHED_FILE_EXTENTS)
99		return &array[index];
100
101	return &direct[index];
102}
103
104
105status_t
106file_map::Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset)
107{
108	TRACE(("file_map::Add(vecCount = %ld)\n", vecCount));
109
110	off_t offset = 0;
111
112	if (vecCount <= CACHED_FILE_EXTENTS && count == 0) {
113		// just use the reserved area in the file_cache_ref structure
114	} else {
115		// TODO: once we can invalidate only parts of the file map,
116		//	we might need to copy the previously cached file extends
117		//	from the direct range
118		file_extent *newMap = (file_extent *)realloc(array,
119			(count + vecCount) * sizeof(file_extent));
120		if (newMap == NULL)
121			return B_NO_MEMORY;
122
123		array = newMap;
124
125		if (count != 0) {
126			file_extent *extent = ExtentAt(count - 1);
127			offset = extent->offset + extent->disk.length;
128		}
129	}
130
131	int32 start = count;
132	count += vecCount;
133
134	for (uint32 i = 0; i < vecCount; i++) {
135		file_extent *extent = ExtentAt(start + i);
136
137		extent->offset = offset;
138		extent->disk = vecs[i];
139
140		offset += extent->disk.length;
141	}
142
143#ifdef TRACE_FILE_CACHE
144	for (uint32 i = 0; i < count; i++) {
145		file_extent *extent = ExtentAt(i);
146		dprintf("  [%ld] extend offset %lld, disk offset %lld, length %lld\n",
147			i, extent->offset, extent->disk.offset, extent->disk.length);
148	}
149#endif
150
151	lastOffset = offset;
152	return B_OK;
153}
154
155
156void
157file_map::Free()
158{
159	if (count > CACHED_FILE_EXTENTS)
160		free(array);
161
162	array = NULL;
163	count = 0;
164}
165
166
167//	#pragma mark -
168
169
170void
171set_vecs(iovec *vecs, size_t *_count, ...)
172{
173	uint32 base = 0;
174	size_t count = 0;
175
176	va_list args;
177	va_start(args, _count);
178
179	while (count < MAX_IO_VECS) {
180		int32 length = va_arg(args, int32);
181		if (length < 0)
182			break;
183
184		vecs[count].iov_base = (void *)base;
185		vecs[count].iov_len = length;
186
187		base += length;
188		count++;
189	}
190
191	va_end(args);
192	*_count = count;
193}
194
195
196void
197set_file_map(int32 base, int32 length, ...)
198{
199	gFileVecs[0].offset = base;
200	gFileVecs[0].length = length;
201
202	gFileSize = length;
203	gFileVecCount = 1;
204
205	va_list args;
206	va_start(args, length);
207
208	while (gFileVecCount < kMaxFileVecs) {
209		off_t offset = va_arg(args, int32);
210		if (offset < 0)
211			break;
212
213		length = va_arg(args, int32);
214
215		gFileVecs[gFileVecCount].offset = offset;
216		gFileVecs[gFileVecCount].length = length;
217
218		gFileSize += length;
219		gFileVecCount++;
220	}
221
222	va_end(args);
223}
224
225
226status_t
227find_map_base(off_t offset, off_t &diskOffset, off_t &diskLength,
228	off_t &fileOffset)
229{
230	fileOffset = 0;
231
232	for (uint32 i = 0; i < gFileVecCount; i++) {
233		if (offset < gFileVecs[i].length) {
234			diskOffset = gFileVecs[i].offset;
235			diskLength = gFileVecs[i].length;
236			return B_OK;
237		}
238
239		fileOffset += gFileVecs[i].length;
240		offset -= gFileVecs[i].length;
241	}
242
243	return B_ENTRY_NOT_FOUND;
244}
245
246
247//	#pragma mark - VFS functions
248
249
250static status_t
251vfs_get_file_map(void *vnode, off_t offset, size_t size, file_io_vec *vecs,
252	size_t *_count)
253{
254	off_t diskOffset, diskLength, fileOffset;
255	size_t max = *_count;
256	uint32 index = 0;
257
258	printf("vfs_get_file_map(offset = %lld, size = %lu, count = %lu)\n",
259		offset, size, *_count);
260
261	while (true) {
262		status_t status = find_map_base(offset, diskOffset, diskLength, fileOffset);
263		//status_t status = inode->FindBlockRun(offset, run, fileOffset);
264		if (status != B_OK)
265			return status;
266
267		vecs[index].offset = diskOffset + offset - fileOffset;
268		vecs[index].length = diskLength - offset + fileOffset;
269		offset += vecs[index].length;
270
271		// are we already done?
272		if (size <= vecs[index].length
273			|| offset >= gFileSize) {
274			if (offset > gFileSize) {
275				// make sure the extent ends with the last official file
276				// block (without taking any preallocations into account)
277				vecs[index].length = gFileSize - fileOffset;
278			}
279			*_count = index + 1;
280			return B_OK;
281		}
282
283		size -= vecs[index].length;
284		index++;
285
286		if (index >= max) {
287			// we're out of file_io_vecs; let's bail out
288			*_count = index;
289			return B_BUFFER_OVERFLOW;
290		}
291	}
292}
293
294
295static status_t
296vfs_read_pages(void *device, void *cookie, off_t offset,
297	const iovec *vecs, size_t count, size_t *bytes, bool kernel)
298{
299	printf("read offset %lld, length %lu\n", offset, *bytes);
300	for (uint32 i = 0; i < count; i++) {
301		printf("  [%lu] base %lu, length %lu\n",
302			i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
303	}
304	return B_OK;
305}
306
307
308static status_t
309vfs_write_pages(void *device, void *cookie, off_t offset,
310	const iovec *vecs, size_t count, size_t *bytes, bool kernel)
311{
312	printf("write offset %lld, length %lu\n", offset, *bytes);
313	for (uint32 i = 0; i < count; i++) {
314		printf("  [%lu] base %lu, length %lu\n",
315			i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
316	}
317	return B_OK;
318}
319
320
321//	#pragma mark - file_cache.cpp copies
322
323
324static file_extent *
325find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index)
326{
327	// TODO: do binary search
328
329	for (uint32 index = 0; index < ref->map.count; index++) {
330		file_extent *extent = ref->map[index];
331
332		if (extent->offset <= offset
333			&& extent->offset + extent->disk.length > offset) {
334			if (_index)
335				*_index = index;
336			return extent;
337		}
338	}
339
340	return NULL;
341}
342
343
344static status_t
345get_file_map(file_cache_ref *ref, off_t offset, size_t size,
346	file_io_vec *vecs, size_t *_count)
347{
348	size_t maxVecs = *_count;
349	status_t status = B_OK;
350
351	if (ref->map.count == 0) {
352		// we don't yet have the map of this file, so let's grab it
353		// (ordered by offset, so that we can do a binary search on them)
354
355		//mutex_lock(&ref->cache->lock);
356
357		// the file map could have been requested in the mean time
358		if (ref->map.count == 0) {
359			size_t vecCount = maxVecs;
360			off_t mapOffset = 0;
361
362			while (true) {
363				status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount);
364				if (status < B_OK && status != B_BUFFER_OVERFLOW) {
365					//mutex_unlock(&ref->cache->lock);
366					return status;
367				}
368
369				status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset);
370				if (addStatus != B_OK) {
371					// only clobber the status in case of failure
372					status = addStatus;
373				}
374
375				if (status != B_BUFFER_OVERFLOW)
376					break;
377
378				// when we are here, the map has been stored in the array, and
379				// the array size was still too small to cover the whole file
380				vecCount = maxVecs;
381			}
382		}
383
384		//mutex_unlock(&ref->cache->lock);
385	}
386
387	if (status != B_OK) {
388		// We must invalidate the (part of the) map we already
389		// have, as we cannot know if it's complete or not
390		ref->map.Free();
391		return status;
392	}
393
394	// We now have cached the map of this file, we now need to
395	// translate it for the requested access.
396
397	uint32 index;
398	file_extent *fileExtent = find_file_extent(ref, offset, &index);
399	if (fileExtent == NULL) {
400		// access outside file bounds? But that's not our problem
401		*_count = 0;
402		return B_OK;
403	}
404
405	offset -= fileExtent->offset;
406	vecs[0].offset = fileExtent->disk.offset + offset;
407	vecs[0].length = fileExtent->disk.length - offset;
408
409	if (vecs[0].length >= size || index >= ref->map.count - 1) {
410		*_count = 1;
411		return B_OK;
412	}
413
414	// copy the rest of the vecs
415
416	size -= vecs[0].length;
417
418	for (index = 1; index < ref->map.count;) {
419		fileExtent++;
420
421		vecs[index] = fileExtent->disk;
422		index++;
423
424		if (size <= fileExtent->disk.length)
425			break;
426
427		if (index >= maxVecs) {
428			*_count = index;
429			return B_BUFFER_OVERFLOW;
430		}
431
432		size -= fileExtent->disk.length;
433	}
434
435	*_count = index;
436	return B_OK;
437}
438
439
440/*!
441	Does the dirty work of translating the request into actual disk offsets
442	and reads to or writes from the supplied iovecs as specified by \a doWrite.
443*/
444static status_t
445pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count,
446	size_t *_numBytes, bool doWrite)
447{
448	TRACE(("pages_io: ref = %p, offset = %lld, size = %lu, vecCount = %lu, %s\n", ref, offset,
449		*_numBytes, count, doWrite ? "write" : "read"));
450
451	// translate the iovecs into direct device accesses
452	file_io_vec fileVecs[MAX_FILE_IO_VECS];
453	size_t fileVecCount = MAX_FILE_IO_VECS;
454	size_t numBytes = *_numBytes;
455
456	status_t status = get_file_map(ref, offset, numBytes, fileVecs,
457		&fileVecCount);
458	if (status < B_OK && status != B_BUFFER_OVERFLOW) {
459		TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n", offset,
460			numBytes, strerror(status)));
461		return status;
462	}
463
464	bool bufferOverflow = status == B_BUFFER_OVERFLOW;
465
466#ifdef TRACE_FILE_CACHE
467	dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount, offset, numBytes,
468		bufferOverflow ? " (array too small)" : "");
469	for (size_t i = 0; i < fileVecCount; i++) {
470		dprintf("  [%lu] offset = %lld, size = %lld\n",
471			i, fileVecs[i].offset, fileVecs[i].length);
472	}
473#endif
474
475	if (fileVecCount == 0) {
476		// There are no file vecs at this offset, so we're obviously trying
477		// to access the file outside of its bounds
478		TRACE(("pages_io: access outside of vnode %p at offset %lld\n",
479			ref->vnode, offset));
480		return B_BAD_VALUE;
481	}
482
483	uint32 fileVecIndex;
484	size_t size;
485
486	if (!doWrite) {
487		// now directly read the data from the device
488		// the first file_io_vec can be read directly
489
490		size = fileVecs[0].length;
491		if (size > numBytes)
492			size = numBytes;
493
494		status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs,
495			count, &size, false);
496		if (status < B_OK)
497			return status;
498
499		// TODO: this is a work-around for buggy device drivers!
500		//	When our own drivers honour the length, we can:
501		//	a) also use this direct I/O for writes (otherwise, it would
502		//	   overwrite precious data)
503		//	b) panic if the term below is true (at least for writes)
504		if (size > fileVecs[0].length) {
505			//dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
506			size = fileVecs[0].length;
507		}
508
509		ASSERT(size <= fileVecs[0].length);
510
511		// If the file portion was contiguous, we're already done now
512		if (size == numBytes)
513			return B_OK;
514
515		// if we reached the end of the file, we can return as well
516		if (size != fileVecs[0].length) {
517			*_numBytes = size;
518			return B_OK;
519		}
520
521		fileVecIndex = 1;
522	} else {
523		fileVecIndex = 0;
524		size = 0;
525	}
526
527	// Too bad, let's process the rest of the file_io_vecs
528
529	size_t totalSize = size;
530
531	// first, find out where we have to continue in our iovecs
532	uint32 i = 0;
533	for (; i < count; i++) {
534		if (size < vecs[i].iov_len)
535			break;
536
537		size -= vecs[i].iov_len;
538	}
539
540	size_t vecOffset = size;
541	size_t bytesLeft = numBytes - size;
542
543	while (true) {
544		for (; fileVecIndex < fileVecCount; fileVecIndex++) {
545			file_io_vec &fileVec = fileVecs[fileVecIndex];
546			off_t fileOffset = fileVec.offset;
547			off_t fileLeft = min_c(fileVec.length, bytesLeft);
548
549			TRACE(("FILE VEC [%lu] length %lld\n", fileVecIndex, fileLeft));
550
551			// process the complete fileVec
552			while (fileLeft > 0) {
553				iovec tempVecs[MAX_TEMP_IO_VECS];
554				uint32 tempCount = 0;
555
556				// size tracks how much of what is left of the current fileVec
557				// (fileLeft) has been assigned to tempVecs
558				size = 0;
559
560				// assign what is left of the current fileVec to the tempVecs
561				for (size = 0; size < fileLeft && i < count
562						&& tempCount < MAX_TEMP_IO_VECS;) {
563					// try to satisfy one iovec per iteration (or as much as
564					// possible)
565
566					// bytes left of the current iovec
567					size_t vecLeft = vecs[i].iov_len - vecOffset;
568					if (vecLeft == 0) {
569						vecOffset = 0;
570						i++;
571						continue;
572					}
573
574					TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
575						i, vecOffset, size));
576
577					// actually available bytes
578					size_t tempVecSize = min_c(vecLeft, fileLeft - size);
579
580					tempVecs[tempCount].iov_base
581						= (void *)((addr_t)vecs[i].iov_base + vecOffset);
582					tempVecs[tempCount].iov_len = tempVecSize;
583					tempCount++;
584
585					size += tempVecSize;
586					vecOffset += tempVecSize;
587				}
588
589				size_t bytes = size;
590				if (doWrite) {
591					status = vfs_write_pages(ref->device, ref->cookie,
592						fileOffset, tempVecs, tempCount, &bytes, false);
593				} else {
594					status = vfs_read_pages(ref->device, ref->cookie,
595						fileOffset, tempVecs, tempCount, &bytes, false);
596				}
597				if (status < B_OK)
598					return status;
599
600				totalSize += bytes;
601				bytesLeft -= size;
602				fileOffset += size;
603				fileLeft -= size;
604				//dprintf("-> file left = %Lu\n", fileLeft);
605
606				if (size != bytes || i >= count) {
607					// there are no more bytes or iovecs, let's bail out
608					*_numBytes = totalSize;
609					return B_OK;
610				}
611			}
612		}
613
614		if (bufferOverflow) {
615			status = get_file_map(ref, offset + totalSize, bytesLeft, fileVecs,
616				&fileVecCount);
617			if (status < B_OK && status != B_BUFFER_OVERFLOW) {
618				TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n",
619					offset, numBytes, strerror(status)));
620				return status;
621			}
622
623			bufferOverflow = status == B_BUFFER_OVERFLOW;
624			fileVecIndex = 0;
625
626#ifdef TRACE_FILE_CACHE
627			dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount,
628				offset + totalSize, numBytes,
629				bufferOverflow ? " (array too small)" : "");
630			for (size_t i = 0; i < fileVecCount; i++) {
631				dprintf("  [%lu] offset = %lld, size = %lld\n",
632					i, fileVecs[i].offset, fileVecs[i].length);
633			}
634#endif
635		} else
636			break;
637	}
638
639	*_numBytes = totalSize;
640	return B_OK;
641}
642
643
644//	#pragma mark -
645
646
647int
648main(int argc, char **argv)
649{
650	file_cache_ref ref;
651	iovec vecs[MAX_IO_VECS];
652	size_t count = 1;
653	size_t numBytes = 10000;
654	off_t offset = 4999;
655
656	set_vecs(vecs, &count, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
657		16, 4096, 8192, 16384, 4096, 4096, -1);
658	set_file_map(0, 2000, 5000, 3000, 10000, 800, 11000, 20, 12000, 30,
659		13000, 70, 14000, 100, 15000, 900, 20000, 30000, -1);
660
661	status_t status = pages_io(&ref, offset, vecs, count, &numBytes, false);
662	if (status < B_OK)
663		fprintf(stderr, "pages_io() returned: %s\n", strerror(status));
664
665	return 0;
666}
667
668