1/*
2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
5 */
6
7
8//! Operations on file descriptors
9
10
11#include <fd.h>
12
13#include <stdlib.h>
14#include <string.h>
15
16#include <OS.h>
17
18#include <AutoDeleter.h>
19
20#include <syscalls.h>
21#include <syscall_restart.h>
22#include <util/AutoLock.h>
23#include <vfs.h>
24#include <wait_for_objects.h>
25
26#include "vfs_tracing.h"
27
28
29//#define TRACE_FD
30#ifdef TRACE_FD
31#	define TRACE(x) dprintf x
32#else
33#	define TRACE(x)
34#endif
35
36
37static const size_t kMaxReadDirBufferSize = 64 * 1024;
38
39
40static struct file_descriptor* get_fd_locked(struct io_context* context,
41	int fd);
42static struct file_descriptor* remove_fd(struct io_context* context, int fd);
43static void deselect_select_infos(file_descriptor* descriptor,
44	select_info* infos);
45
46
47struct FDGetterLocking {
48	inline bool Lock(file_descriptor* /*lockable*/)
49	{
50		return false;
51	}
52
53	inline void Unlock(file_descriptor* lockable)
54	{
55		put_fd(lockable);
56	}
57};
58
59class FDGetter : public AutoLocker<file_descriptor, FDGetterLocking> {
60public:
61	inline FDGetter()
62		: AutoLocker<file_descriptor, FDGetterLocking>()
63	{
64	}
65
66	inline FDGetter(io_context* context, int fd, bool contextLocked = false)
67		: AutoLocker<file_descriptor, FDGetterLocking>(
68			contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd))
69	{
70	}
71
72	inline file_descriptor* SetTo(io_context* context, int fd,
73		bool contextLocked = false)
74	{
75		file_descriptor* descriptor
76			= contextLocked ? get_fd_locked(context, fd) : get_fd(context, fd);
77		AutoLocker<file_descriptor, FDGetterLocking>::SetTo(descriptor, true);
78		return descriptor;
79	}
80
81	inline file_descriptor* SetTo(int fd, bool kernel,
82		bool contextLocked = false)
83	{
84		return SetTo(get_current_io_context(kernel), fd, contextLocked);
85	}
86
87	inline file_descriptor* FD() const
88	{
89		return fLockable;
90	}
91};
92
93
94//	#pragma mark - General fd routines
95
96
97#ifdef DEBUG
98void dump_fd(int fd, struct file_descriptor* descriptor);
99
100void
101dump_fd(int fd,struct file_descriptor* descriptor)
102{
103	dprintf("fd[%d] = %p: type = %ld, ref_count = %ld, ops = %p, u.vnode = %p, "
104		"u.mount = %p, cookie = %p, open_mode = %lx, pos = %Ld\n",
105		fd, descriptor, descriptor->type, descriptor->ref_count,
106		descriptor->ops, descriptor->u.vnode, descriptor->u.mount,
107		descriptor->cookie, descriptor->open_mode, descriptor->pos);
108}
109#endif
110
111
112/*! Allocates and initializes a new file_descriptor.
113*/
114struct file_descriptor*
115alloc_fd(void)
116{
117	file_descriptor* descriptor
118		= (file_descriptor*)malloc(sizeof(struct file_descriptor));
119	if (descriptor == NULL)
120		return NULL;
121
122	descriptor->u.vnode = NULL;
123	descriptor->cookie = NULL;
124	descriptor->ref_count = 1;
125	descriptor->open_count = 0;
126	descriptor->open_mode = 0;
127	descriptor->pos = 0;
128
129	return descriptor;
130}
131
132
133bool
134fd_close_on_exec(struct io_context* context, int fd)
135{
136	return CHECK_BIT(context->fds_close_on_exec[fd / 8], fd & 7) ? true : false;
137}
138
139
140void
141fd_set_close_on_exec(struct io_context* context, int fd, bool closeFD)
142{
143	if (closeFD)
144		context->fds_close_on_exec[fd / 8] |= (1 << (fd & 7));
145	else
146		context->fds_close_on_exec[fd / 8] &= ~(1 << (fd & 7));
147}
148
149
150/*!	Searches a free slot in the FD table of the provided I/O context, and
151	inserts the specified descriptor into it.
152*/
153int
154new_fd_etc(struct io_context* context, struct file_descriptor* descriptor,
155	int firstIndex)
156{
157	int fd = -1;
158	uint32 i;
159
160	mutex_lock(&context->io_mutex);
161
162	for (i = firstIndex; i < context->table_size; i++) {
163		if (!context->fds[i]) {
164			fd = i;
165			break;
166		}
167	}
168	if (fd < 0) {
169		fd = B_NO_MORE_FDS;
170		goto err;
171	}
172
173	TFD(NewFD(context, fd, descriptor));
174
175	context->fds[fd] = descriptor;
176	context->num_used_fds++;
177	atomic_add(&descriptor->open_count, 1);
178
179err:
180	mutex_unlock(&context->io_mutex);
181
182	return fd;
183}
184
185
186int
187new_fd(struct io_context* context, struct file_descriptor* descriptor)
188{
189	return new_fd_etc(context, descriptor, 0);
190}
191
192
193/*!	Reduces the descriptor's reference counter, and frees all resources
194	when it's no longer used.
195*/
196void
197put_fd(struct file_descriptor* descriptor)
198{
199	int32 previous = atomic_add(&descriptor->ref_count, -1);
200
201	TFD(PutFD(descriptor));
202
203	TRACE(("put_fd(descriptor = %p [ref = %ld, cookie = %p])\n",
204		descriptor, descriptor->ref_count, descriptor->cookie));
205
206	// free the descriptor if we don't need it anymore
207	if (previous == 1) {
208		// free the underlying object
209		if (descriptor->ops != NULL && descriptor->ops->fd_free != NULL)
210			descriptor->ops->fd_free(descriptor);
211
212		free(descriptor);
213	} else if ((descriptor->open_mode & O_DISCONNECTED) != 0
214		&& previous - 1 == descriptor->open_count
215		&& descriptor->ops != NULL) {
216		// the descriptor has been disconnected - it cannot
217		// be accessed anymore, let's close it (no one is
218		// currently accessing this descriptor)
219
220		if (descriptor->ops->fd_close)
221			descriptor->ops->fd_close(descriptor);
222		if (descriptor->ops->fd_free)
223			descriptor->ops->fd_free(descriptor);
224
225		// prevent this descriptor from being closed/freed again
226		descriptor->open_count = -1;
227		descriptor->ref_count = -1;
228		descriptor->ops = NULL;
229		descriptor->u.vnode = NULL;
230
231		// the file descriptor is kept intact, so that it's not
232		// reused until someone explicetly closes it
233	}
234}
235
236
237/*!	Decrements the open counter of the file descriptor and invokes
238	its close hook when appropriate.
239*/
240void
241close_fd(struct file_descriptor* descriptor)
242{
243	if (atomic_add(&descriptor->open_count, -1) == 1) {
244		vfs_unlock_vnode_if_locked(descriptor);
245
246		if (descriptor->ops != NULL && descriptor->ops->fd_close != NULL)
247			descriptor->ops->fd_close(descriptor);
248	}
249}
250
251
252status_t
253close_fd_index(struct io_context* context, int fd)
254{
255	struct file_descriptor* descriptor = remove_fd(context, fd);
256
257	if (descriptor == NULL)
258		return B_FILE_ERROR;
259
260	close_fd(descriptor);
261	put_fd(descriptor);
262		// the reference associated with the slot
263
264	return B_OK;
265}
266
267
268/*!	This descriptor's underlying object will be closed and freed as soon as
269	possible (in one of the next calls to put_fd() - get_fd() will no longer
270	succeed on this descriptor).
271	This is useful if the underlying object is gone, for instance when a
272	(mounted) volume got removed unexpectedly.
273*/
274void
275disconnect_fd(struct file_descriptor* descriptor)
276{
277	descriptor->open_mode |= O_DISCONNECTED;
278}
279
280
281void
282inc_fd_ref_count(struct file_descriptor* descriptor)
283{
284	atomic_add(&descriptor->ref_count, 1);
285}
286
287
288static struct file_descriptor*
289get_fd_locked(struct io_context* context, int fd)
290{
291	if (fd < 0 || (uint32)fd >= context->table_size)
292		return NULL;
293
294	struct file_descriptor* descriptor = context->fds[fd];
295
296	if (descriptor != NULL) {
297		// Disconnected descriptors cannot be accessed anymore
298		if (descriptor->open_mode & O_DISCONNECTED)
299			descriptor = NULL;
300		else {
301			TFD(GetFD(context, fd, descriptor));
302			inc_fd_ref_count(descriptor);
303		}
304	}
305
306	return descriptor;
307}
308
309
310struct file_descriptor*
311get_fd(struct io_context* context, int fd)
312{
313	MutexLocker _(context->io_mutex);
314
315	return get_fd_locked(context, fd);
316}
317
318
319struct file_descriptor*
320get_open_fd(struct io_context* context, int fd)
321{
322	MutexLocker _(context->io_mutex);
323
324	file_descriptor* descriptor = get_fd_locked(context, fd);
325	if (descriptor == NULL)
326		return NULL;
327
328	atomic_add(&descriptor->open_count, 1);
329
330	return descriptor;
331}
332
333
334/*!	Removes the file descriptor from the specified slot.
335*/
336static struct file_descriptor*
337remove_fd(struct io_context* context, int fd)
338{
339	struct file_descriptor* descriptor = NULL;
340
341	if (fd < 0)
342		return NULL;
343
344	mutex_lock(&context->io_mutex);
345
346	if ((uint32)fd < context->table_size)
347		descriptor = context->fds[fd];
348
349	select_info* selectInfos = NULL;
350	bool disconnected = false;
351
352	if (descriptor != NULL)	{
353		// fd is valid
354		TFD(RemoveFD(context, fd, descriptor));
355
356		context->fds[fd] = NULL;
357		fd_set_close_on_exec(context, fd, false);
358		context->num_used_fds--;
359
360		selectInfos = context->select_infos[fd];
361		context->select_infos[fd] = NULL;
362
363		disconnected = (descriptor->open_mode & O_DISCONNECTED);
364	}
365
366	mutex_unlock(&context->io_mutex);
367
368	if (selectInfos != NULL)
369		deselect_select_infos(descriptor, selectInfos);
370
371	return disconnected ? NULL : descriptor;
372}
373
374
375static int
376dup_fd(int fd, bool kernel)
377{
378	struct io_context* context = get_current_io_context(kernel);
379	struct file_descriptor* descriptor;
380	int status;
381
382	TRACE(("dup_fd: fd = %d\n", fd));
383
384	// Try to get the fd structure
385	descriptor = get_fd(context, fd);
386	if (descriptor == NULL)
387		return B_FILE_ERROR;
388
389	// now put the fd in place
390	status = new_fd(context, descriptor);
391	if (status < 0)
392		put_fd(descriptor);
393	else {
394		mutex_lock(&context->io_mutex);
395		fd_set_close_on_exec(context, status, false);
396		mutex_unlock(&context->io_mutex);
397	}
398
399	return status;
400}
401
402
403/*!	POSIX says this should be the same as:
404		close(newfd);
405		fcntl(oldfd, F_DUPFD, newfd);
406
407	We do dup2() directly to be thread-safe.
408*/
409static int
410dup2_fd(int oldfd, int newfd, bool kernel)
411{
412	struct file_descriptor* evicted = NULL;
413	struct io_context* context;
414
415	TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd));
416
417	// quick check
418	if (oldfd < 0 || newfd < 0)
419		return B_FILE_ERROR;
420
421	// Get current I/O context and lock it
422	context = get_current_io_context(kernel);
423	mutex_lock(&context->io_mutex);
424
425	// Check if the fds are valid (mutex must be locked because
426	// the table size could be changed)
427	if ((uint32)oldfd >= context->table_size
428		|| (uint32)newfd >= context->table_size
429		|| context->fds[oldfd] == NULL) {
430		mutex_unlock(&context->io_mutex);
431		return B_FILE_ERROR;
432	}
433
434	// Check for identity, note that it cannot be made above
435	// because we always want to return an error on invalid
436	// handles
437	select_info* selectInfos = NULL;
438	if (oldfd != newfd) {
439		// Now do the work
440		TFD(Dup2FD(context, oldfd, newfd));
441
442		evicted = context->fds[newfd];
443		selectInfos = context->select_infos[newfd];
444		context->select_infos[newfd] = NULL;
445		atomic_add(&context->fds[oldfd]->ref_count, 1);
446		atomic_add(&context->fds[oldfd]->open_count, 1);
447		context->fds[newfd] = context->fds[oldfd];
448
449		if (evicted == NULL)
450			context->num_used_fds++;
451	}
452
453	fd_set_close_on_exec(context, newfd, false);
454
455	mutex_unlock(&context->io_mutex);
456
457	// Say bye bye to the evicted fd
458	if (evicted) {
459		deselect_select_infos(evicted, selectInfos);
460		close_fd(evicted);
461		put_fd(evicted);
462	}
463
464	return newfd;
465}
466
467
468/*!	Duplicates an FD from another team to this/the kernel team.
469	\param fromTeam The team which owns the FD.
470	\param fd The FD to duplicate.
471	\param kernel If \c true, the new FD will be created in the kernel team,
472			the current userland team otherwise.
473	\return The newly created FD or an error code, if something went wrong.
474*/
475int
476dup_foreign_fd(team_id fromTeam, int fd, bool kernel)
477{
478	// get the I/O context for the team in question
479	Team* team = Team::Get(fromTeam);
480	if (team == NULL)
481		return B_BAD_TEAM_ID;
482	BReference<Team> teamReference(team, true);
483
484	io_context* fromContext = team->io_context;
485
486	// get the file descriptor
487	file_descriptor* descriptor = get_fd(fromContext, fd);
488	if (descriptor == NULL)
489		return B_FILE_ERROR;
490	CObjectDeleter<file_descriptor> descriptorPutter(descriptor, put_fd);
491
492	// create a new FD in the target I/O context
493	int result = new_fd(get_current_io_context(kernel), descriptor);
494	if (result >= 0) {
495		// the descriptor reference belongs to the slot, now
496		descriptorPutter.Detach();
497	}
498
499	return result;
500}
501
502
503static status_t
504fd_ioctl(bool kernelFD, int fd, uint32 op, void* buffer, size_t length)
505{
506	struct file_descriptor* descriptor;
507	int status;
508
509	descriptor = get_fd(get_current_io_context(kernelFD), fd);
510	if (descriptor == NULL)
511		return B_FILE_ERROR;
512
513	if (descriptor->ops->fd_ioctl)
514		status = descriptor->ops->fd_ioctl(descriptor, op, buffer, length);
515	else
516		status = B_DEV_INVALID_IOCTL;
517
518	if (status == B_DEV_INVALID_IOCTL)
519		status = ENOTTY;
520
521	put_fd(descriptor);
522	return status;
523}
524
525
526static void
527deselect_select_infos(file_descriptor* descriptor, select_info* infos)
528{
529	TRACE(("deselect_select_infos(%p, %p)\n", descriptor, infos));
530
531	select_info* info = infos;
532	while (info != NULL) {
533		select_sync* sync = info->sync;
534
535		// deselect the selected events
536		uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
537		if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
538			for (uint16 event = 1; event < 16; event++) {
539				if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
540					descriptor->ops->fd_deselect(descriptor, event,
541						(selectsync*)info);
542				}
543			}
544		}
545
546		notify_select_events(info, B_EVENT_INVALID);
547		info = info->next;
548		put_select_sync(sync);
549	}
550}
551
552
553status_t
554select_fd(int32 fd, struct select_info* info, bool kernel)
555{
556	TRACE(("select_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
557		info->sync, info->selected_events));
558
559	FDGetter fdGetter;
560		// define before the context locker, so it will be destroyed after it
561
562	io_context* context = get_current_io_context(kernel);
563	MutexLocker locker(context->io_mutex);
564
565	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
566	if (descriptor == NULL)
567		return B_FILE_ERROR;
568
569	uint16 eventsToSelect = info->selected_events & ~B_EVENT_INVALID;
570
571	if (descriptor->ops->fd_select == NULL && eventsToSelect != 0) {
572		// if the I/O subsystem doesn't support select(), we will
573		// immediately notify the select call
574		return notify_select_events(info, eventsToSelect);
575	}
576
577	// We need the FD to stay open while we're doing this, so no select()/
578	// deselect() will be called on it after it is closed.
579	atomic_add(&descriptor->open_count, 1);
580
581	locker.Unlock();
582
583	// select any events asked for
584	uint32 selectedEvents = 0;
585
586	for (uint16 event = 1; event < 16; event++) {
587		if ((eventsToSelect & SELECT_FLAG(event)) != 0
588			&& descriptor->ops->fd_select(descriptor, event,
589				(selectsync*)info) == B_OK) {
590			selectedEvents |= SELECT_FLAG(event);
591		}
592	}
593	info->selected_events = selectedEvents
594		| (info->selected_events & B_EVENT_INVALID);
595
596	// Add the info to the IO context. Even if nothing has been selected -- we
597	// always support B_EVENT_INVALID.
598	locker.Lock();
599	if (context->fds[fd] != descriptor) {
600		// Someone close()d the index in the meantime. deselect() all
601		// events.
602		info->next = NULL;
603		deselect_select_infos(descriptor, info);
604
605		// Release our open reference of the descriptor.
606		close_fd(descriptor);
607		return B_FILE_ERROR;
608	}
609
610	// The FD index hasn't changed, so we add the select info to the table.
611
612	info->next = context->select_infos[fd];
613	context->select_infos[fd] = info;
614
615	// As long as the info is in the list, we keep a reference to the sync
616	// object.
617	atomic_add(&info->sync->ref_count, 1);
618
619	// Finally release our open reference. It is safe just to decrement,
620	// since as long as the descriptor is associated with the slot,
621	// someone else still has it open.
622	atomic_add(&descriptor->open_count, -1);
623
624	return B_OK;
625}
626
627
628status_t
629deselect_fd(int32 fd, struct select_info* info, bool kernel)
630{
631	TRACE(("deselect_fd(fd = %ld, info = %p (%p), 0x%x)\n", fd, info,
632		info->sync, info->selected_events));
633
634	FDGetter fdGetter;
635		// define before the context locker, so it will be destroyed after it
636
637	io_context* context = get_current_io_context(kernel);
638	MutexLocker locker(context->io_mutex);
639
640	struct file_descriptor* descriptor = fdGetter.SetTo(context, fd, true);
641	if (descriptor == NULL)
642		return B_FILE_ERROR;
643
644	// remove the info from the IO context
645
646	select_info** infoLocation = &context->select_infos[fd];
647	while (*infoLocation != NULL && *infoLocation != info)
648		infoLocation = &(*infoLocation)->next;
649
650	// If not found, someone else beat us to it.
651	if (*infoLocation != info)
652		return B_OK;
653
654	*infoLocation = info->next;
655
656	locker.Unlock();
657
658	// deselect the selected events
659	uint16 eventsToDeselect = info->selected_events & ~B_EVENT_INVALID;
660	if (descriptor->ops->fd_deselect != NULL && eventsToDeselect != 0) {
661		for (uint16 event = 1; event < 16; event++) {
662			if ((eventsToDeselect & SELECT_FLAG(event)) != 0) {
663				descriptor->ops->fd_deselect(descriptor, event,
664					(selectsync*)info);
665			}
666		}
667	}
668
669	put_select_sync(info->sync);
670
671	return B_OK;
672}
673
674
675/*!	This function checks if the specified fd is valid in the current
676	context. It can be used for a quick check; the fd is not locked
677	so it could become invalid immediately after this check.
678*/
679bool
680fd_is_valid(int fd, bool kernel)
681{
682	struct file_descriptor* descriptor
683		= get_fd(get_current_io_context(kernel), fd);
684	if (descriptor == NULL)
685		return false;
686
687	put_fd(descriptor);
688	return true;
689}
690
691
692struct vnode*
693fd_vnode(struct file_descriptor* descriptor)
694{
695	switch (descriptor->type) {
696		case FDTYPE_FILE:
697		case FDTYPE_DIR:
698		case FDTYPE_ATTR_DIR:
699		case FDTYPE_ATTR:
700			return descriptor->u.vnode;
701	}
702
703	return NULL;
704}
705
706
707static status_t
708common_close(int fd, bool kernel)
709{
710	return close_fd_index(get_current_io_context(kernel), fd);
711}
712
713
714static ssize_t
715common_user_io(int fd, off_t pos, void* buffer, size_t length, bool write)
716{
717	if (!IS_USER_ADDRESS(buffer))
718		return B_BAD_ADDRESS;
719
720	if (pos < -1)
721		return B_BAD_VALUE;
722
723	FDGetter fdGetter;
724	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
725	if (!descriptor)
726		return B_FILE_ERROR;
727
728	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
729			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
730		return B_FILE_ERROR;
731	}
732
733	bool movePosition = false;
734	if (pos == -1) {
735		pos = descriptor->pos;
736		movePosition = true;
737	}
738
739	if (write ? descriptor->ops->fd_write == NULL
740			: descriptor->ops->fd_read == NULL) {
741		return B_BAD_VALUE;
742	}
743
744	SyscallRestartWrapper<status_t> status;
745
746	if (write)
747		status = descriptor->ops->fd_write(descriptor, pos, buffer, &length);
748	else
749		status = descriptor->ops->fd_read(descriptor, pos, buffer, &length);
750
751	if (status != B_OK)
752		return status;
753
754	if (movePosition)
755		descriptor->pos = pos + length;
756
757	return length <= SSIZE_MAX ? (ssize_t)length : SSIZE_MAX;
758}
759
760
761static ssize_t
762common_user_vector_io(int fd, off_t pos, const iovec* userVecs, size_t count,
763	bool write)
764{
765	if (!IS_USER_ADDRESS(userVecs))
766		return B_BAD_ADDRESS;
767
768	if (pos < -1)
769		return B_BAD_VALUE;
770
771	// prevent integer overflow exploit in malloc()
772	if (count > IOV_MAX)
773		return B_BAD_VALUE;
774
775	FDGetter fdGetter;
776	struct file_descriptor* descriptor = fdGetter.SetTo(fd, false);
777	if (!descriptor)
778		return B_FILE_ERROR;
779
780	if (write ? (descriptor->open_mode & O_RWMASK) == O_RDONLY
781			: (descriptor->open_mode & O_RWMASK) == O_WRONLY) {
782		return B_FILE_ERROR;
783	}
784
785	iovec* vecs = (iovec*)malloc(sizeof(iovec) * count);
786	if (vecs == NULL)
787		return B_NO_MEMORY;
788	MemoryDeleter _(vecs);
789
790	if (user_memcpy(vecs, userVecs, sizeof(iovec) * count) != B_OK)
791		return B_BAD_ADDRESS;
792
793	bool movePosition = false;
794	if (pos == -1) {
795		pos = descriptor->pos;
796		movePosition = true;
797	}
798
799	if (write ? descriptor->ops->fd_write == NULL
800			: descriptor->ops->fd_read == NULL) {
801		return B_BAD_VALUE;
802	}
803
804	SyscallRestartWrapper<status_t> status;
805
806	ssize_t bytesTransferred = 0;
807	for (uint32 i = 0; i < count; i++) {
808		size_t length = vecs[i].iov_len;
809		if (write) {
810			status = descriptor->ops->fd_write(descriptor, pos,
811				vecs[i].iov_base, &length);
812		} else {
813			status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
814				&length);
815		}
816
817		if (status != B_OK) {
818			if (bytesTransferred == 0)
819				return status;
820			status = B_OK;
821			break;
822		}
823
824		if ((uint64)bytesTransferred + length > SSIZE_MAX)
825			bytesTransferred = SSIZE_MAX;
826		else
827			bytesTransferred += (ssize_t)length;
828
829		pos += length;
830
831		if (length < vecs[i].iov_len)
832			break;
833	}
834
835	if (movePosition)
836		descriptor->pos = pos;
837
838	return bytesTransferred;
839}
840
841
842status_t
843user_fd_kernel_ioctl(int fd, uint32 op, void* buffer, size_t length)
844{
845	TRACE(("user_fd_kernel_ioctl: fd %d\n", fd));
846
847	return fd_ioctl(false, fd, op, buffer, length);
848}
849
850
851//	#pragma mark - User syscalls
852
853
854ssize_t
855_user_read(int fd, off_t pos, void* buffer, size_t length)
856{
857	return common_user_io(fd, pos, buffer, length, false);
858}
859
860
861ssize_t
862_user_readv(int fd, off_t pos, const iovec* userVecs, size_t count)
863{
864	return common_user_vector_io(fd, pos, userVecs, count, false);
865}
866
867
868ssize_t
869_user_write(int fd, off_t pos, const void* buffer, size_t length)
870{
871	return common_user_io(fd, pos, (void*)buffer, length, true);
872}
873
874
875ssize_t
876_user_writev(int fd, off_t pos, const iovec* userVecs, size_t count)
877{
878	return common_user_vector_io(fd, pos, userVecs, count, true);
879}
880
881
882off_t
883_user_seek(int fd, off_t pos, int seekType)
884{
885	syscall_64_bit_return_value();
886
887	struct file_descriptor* descriptor;
888
889	descriptor = get_fd(get_current_io_context(false), fd);
890	if (!descriptor)
891		return B_FILE_ERROR;
892
893	TRACE(("user_seek(descriptor = %p)\n", descriptor));
894
895	if (descriptor->ops->fd_seek)
896		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
897	else
898		pos = ESPIPE;
899
900	put_fd(descriptor);
901	return pos;
902}
903
904
905status_t
906_user_ioctl(int fd, uint32 op, void* buffer, size_t length)
907{
908	if (!IS_USER_ADDRESS(buffer))
909		return B_BAD_ADDRESS;
910
911	TRACE(("user_ioctl: fd %d\n", fd));
912
913	SyscallRestartWrapper<status_t> status;
914
915	return status = fd_ioctl(false, fd, op, buffer, length);
916}
917
918
919ssize_t
920_user_read_dir(int fd, struct dirent* userBuffer, size_t bufferSize,
921	uint32 maxCount)
922{
923	TRACE(("user_read_dir(fd = %d, userBuffer = %p, bufferSize = %ld, count = "
924		"%lu)\n", fd, userBuffer, bufferSize, maxCount));
925
926	if (maxCount == 0)
927		return 0;
928
929	if (userBuffer == NULL || !IS_USER_ADDRESS(userBuffer))
930		return B_BAD_ADDRESS;
931
932	// get I/O context and FD
933	io_context* ioContext = get_current_io_context(false);
934	FDGetter fdGetter;
935	struct file_descriptor* descriptor = fdGetter.SetTo(ioContext, fd, false);
936	if (descriptor == NULL)
937		return B_FILE_ERROR;
938
939	if (descriptor->ops->fd_read_dir == NULL)
940		return B_UNSUPPORTED;
941
942	// restrict buffer size and allocate a heap buffer
943	if (bufferSize > kMaxReadDirBufferSize)
944		bufferSize = kMaxReadDirBufferSize;
945	struct dirent* buffer = (struct dirent*)malloc(bufferSize);
946	if (buffer == NULL)
947		return B_NO_MEMORY;
948	MemoryDeleter bufferDeleter(buffer);
949
950	// read the directory
951	uint32 count = maxCount;
952	status_t status = descriptor->ops->fd_read_dir(ioContext, descriptor,
953		buffer, bufferSize, &count);
954	if (status != B_OK)
955		return status;
956
957	// copy the buffer back -- determine the total buffer size first
958	size_t sizeToCopy = 0;
959	struct dirent* entry = buffer;
960	for (uint32 i = 0; i < count; i++) {
961		size_t length = entry->d_reclen;
962		sizeToCopy += length;
963		entry = (struct dirent*)((uint8*)entry + length);
964	}
965
966	if (user_memcpy(userBuffer, buffer, sizeToCopy) != B_OK)
967		return B_BAD_ADDRESS;
968
969	return count;
970}
971
972
973status_t
974_user_rewind_dir(int fd)
975{
976	struct file_descriptor* descriptor;
977	status_t status;
978
979	TRACE(("user_rewind_dir(fd = %d)\n", fd));
980
981	descriptor = get_fd(get_current_io_context(false), fd);
982	if (descriptor == NULL)
983		return B_FILE_ERROR;
984
985	if (descriptor->ops->fd_rewind_dir)
986		status = descriptor->ops->fd_rewind_dir(descriptor);
987	else
988		status = B_UNSUPPORTED;
989
990	put_fd(descriptor);
991	return status;
992}
993
994
995status_t
996_user_close(int fd)
997{
998	return common_close(fd, false);
999}
1000
1001
1002int
1003_user_dup(int fd)
1004{
1005	return dup_fd(fd, false);
1006}
1007
1008
1009int
1010_user_dup2(int ofd, int nfd)
1011{
1012	return dup2_fd(ofd, nfd, false);
1013}
1014
1015
1016//	#pragma mark - Kernel calls
1017
1018
1019ssize_t
1020_kern_read(int fd, off_t pos, void* buffer, size_t length)
1021{
1022	if (pos < -1)
1023		return B_BAD_VALUE;
1024
1025	FDGetter fdGetter;
1026	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1027
1028	if (!descriptor)
1029		return B_FILE_ERROR;
1030	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1031		return B_FILE_ERROR;
1032
1033	bool movePosition = false;
1034	if (pos == -1) {
1035		pos = descriptor->pos;
1036		movePosition = true;
1037	}
1038
1039	SyscallFlagUnsetter _;
1040
1041	if (descriptor->ops->fd_read == NULL)
1042		return B_BAD_VALUE;
1043
1044	ssize_t bytesRead = descriptor->ops->fd_read(descriptor, pos, buffer,
1045		&length);
1046	if (bytesRead >= B_OK) {
1047		if (length > SSIZE_MAX)
1048			bytesRead = SSIZE_MAX;
1049		else
1050			bytesRead = (ssize_t)length;
1051
1052		if (movePosition)
1053			descriptor->pos = pos + length;
1054	}
1055
1056	return bytesRead;
1057}
1058
1059
1060ssize_t
1061_kern_readv(int fd, off_t pos, const iovec* vecs, size_t count)
1062{
1063	bool movePosition = false;
1064	status_t status;
1065	uint32 i;
1066
1067	if (pos < -1)
1068		return B_BAD_VALUE;
1069
1070	FDGetter fdGetter;
1071	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1072
1073	if (!descriptor)
1074		return B_FILE_ERROR;
1075	if ((descriptor->open_mode & O_RWMASK) == O_WRONLY)
1076		return B_FILE_ERROR;
1077
1078	if (pos == -1) {
1079		pos = descriptor->pos;
1080		movePosition = true;
1081	}
1082
1083	if (descriptor->ops->fd_read == NULL)
1084		return B_BAD_VALUE;
1085
1086	SyscallFlagUnsetter _;
1087
1088	ssize_t bytesRead = 0;
1089
1090	for (i = 0; i < count; i++) {
1091		size_t length = vecs[i].iov_len;
1092		status = descriptor->ops->fd_read(descriptor, pos, vecs[i].iov_base,
1093			&length);
1094		if (status != B_OK) {
1095			bytesRead = status;
1096			break;
1097		}
1098
1099		if ((uint64)bytesRead + length > SSIZE_MAX)
1100			bytesRead = SSIZE_MAX;
1101		else
1102			bytesRead += (ssize_t)length;
1103
1104		pos += vecs[i].iov_len;
1105	}
1106
1107	if (movePosition)
1108		descriptor->pos = pos;
1109
1110	return bytesRead;
1111}
1112
1113
1114ssize_t
1115_kern_write(int fd, off_t pos, const void* buffer, size_t length)
1116{
1117	if (pos < -1)
1118		return B_BAD_VALUE;
1119
1120	FDGetter fdGetter;
1121	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1122
1123	if (descriptor == NULL)
1124		return B_FILE_ERROR;
1125	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1126		return B_FILE_ERROR;
1127
1128	bool movePosition = false;
1129	if (pos == -1) {
1130		pos = descriptor->pos;
1131		movePosition = true;
1132	}
1133
1134	if (descriptor->ops->fd_write == NULL)
1135		return B_BAD_VALUE;
1136
1137	SyscallFlagUnsetter _;
1138
1139	ssize_t bytesWritten = descriptor->ops->fd_write(descriptor, pos, buffer,
1140		&length);
1141	if (bytesWritten >= B_OK) {
1142		if (length > SSIZE_MAX)
1143			bytesWritten = SSIZE_MAX;
1144		else
1145			bytesWritten = (ssize_t)length;
1146
1147		if (movePosition)
1148			descriptor->pos = pos + length;
1149	}
1150
1151	return bytesWritten;
1152}
1153
1154
1155ssize_t
1156_kern_writev(int fd, off_t pos, const iovec* vecs, size_t count)
1157{
1158	bool movePosition = false;
1159	status_t status;
1160	uint32 i;
1161
1162	if (pos < -1)
1163		return B_BAD_VALUE;
1164
1165	FDGetter fdGetter;
1166	struct file_descriptor* descriptor = fdGetter.SetTo(fd, true);
1167
1168	if (!descriptor)
1169		return B_FILE_ERROR;
1170	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY)
1171		return B_FILE_ERROR;
1172
1173	if (pos == -1) {
1174		pos = descriptor->pos;
1175		movePosition = true;
1176	}
1177
1178	if (descriptor->ops->fd_write == NULL)
1179		return B_BAD_VALUE;
1180
1181	SyscallFlagUnsetter _;
1182
1183	ssize_t bytesWritten = 0;
1184
1185	for (i = 0; i < count; i++) {
1186		size_t length = vecs[i].iov_len;
1187		status = descriptor->ops->fd_write(descriptor, pos,
1188			vecs[i].iov_base, &length);
1189		if (status != B_OK) {
1190			bytesWritten = status;
1191			break;
1192		}
1193
1194		if ((uint64)bytesWritten + length > SSIZE_MAX)
1195			bytesWritten = SSIZE_MAX;
1196		else
1197			bytesWritten += (ssize_t)length;
1198
1199		pos += vecs[i].iov_len;
1200	}
1201
1202	if (movePosition)
1203		descriptor->pos = pos;
1204
1205	return bytesWritten;
1206}
1207
1208
1209off_t
1210_kern_seek(int fd, off_t pos, int seekType)
1211{
1212	struct file_descriptor* descriptor;
1213
1214	descriptor = get_fd(get_current_io_context(true), fd);
1215	if (!descriptor)
1216		return B_FILE_ERROR;
1217
1218	if (descriptor->ops->fd_seek)
1219		pos = descriptor->ops->fd_seek(descriptor, pos, seekType);
1220	else
1221		pos = ESPIPE;
1222
1223	put_fd(descriptor);
1224	return pos;
1225}
1226
1227
1228status_t
1229_kern_ioctl(int fd, uint32 op, void* buffer, size_t length)
1230{
1231	TRACE(("kern_ioctl: fd %d\n", fd));
1232
1233	SyscallFlagUnsetter _;
1234
1235	return fd_ioctl(true, fd, op, buffer, length);
1236}
1237
1238
1239ssize_t
1240_kern_read_dir(int fd, struct dirent* buffer, size_t bufferSize,
1241	uint32 maxCount)
1242{
1243	struct file_descriptor* descriptor;
1244	ssize_t retval;
1245
1246	TRACE(("sys_read_dir(fd = %d, buffer = %p, bufferSize = %ld, count = "
1247		"%lu)\n",fd, buffer, bufferSize, maxCount));
1248
1249	struct io_context* ioContext = get_current_io_context(true);
1250	descriptor = get_fd(ioContext, fd);
1251	if (descriptor == NULL)
1252		return B_FILE_ERROR;
1253
1254	if (descriptor->ops->fd_read_dir) {
1255		uint32 count = maxCount;
1256		retval = descriptor->ops->fd_read_dir(ioContext, descriptor, buffer,
1257			bufferSize, &count);
1258		if (retval >= 0)
1259			retval = count;
1260	} else
1261		retval = B_UNSUPPORTED;
1262
1263	put_fd(descriptor);
1264	return retval;
1265}
1266
1267
1268status_t
1269_kern_rewind_dir(int fd)
1270{
1271	struct file_descriptor* descriptor;
1272	status_t status;
1273
1274	TRACE(("sys_rewind_dir(fd = %d)\n",fd));
1275
1276	descriptor = get_fd(get_current_io_context(true), fd);
1277	if (descriptor == NULL)
1278		return B_FILE_ERROR;
1279
1280	if (descriptor->ops->fd_rewind_dir)
1281		status = descriptor->ops->fd_rewind_dir(descriptor);
1282	else
1283		status = B_UNSUPPORTED;
1284
1285	put_fd(descriptor);
1286	return status;
1287}
1288
1289
1290status_t
1291_kern_close(int fd)
1292{
1293	return common_close(fd, true);
1294}
1295
1296
1297int
1298_kern_dup(int fd)
1299{
1300	return dup_fd(fd, true);
1301}
1302
1303
1304int
1305_kern_dup2(int ofd, int nfd)
1306{
1307	return dup2_fd(ofd, nfd, true);
1308}
1309
1310