1/*
2 * Copyright 2007-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2008, Axel D��rfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
5 */
6
7#include <fs/select_sync_pool.h>
8#include <wait_for_objects.h>
9
10#include <new>
11
12#include <poll.h>
13#include <signal.h>
14#include <stdlib.h>
15#include <string.h>
16#include <sys/select.h>
17
18#include <OS.h>
19#include <Select.h>
20
21#include <AutoDeleter.h>
22#include <StackOrHeapArray.h>
23
24#include <event_queue.h>
25#include <fs/fd.h>
26#include <port.h>
27#include <sem.h>
28#include <syscalls.h>
29#include <syscall_restart.h>
30#include <thread.h>
31#include <tracing.h>
32#include <util/AutoLock.h>
33#include <util/DoublyLinkedList.h>
34#include <vfs.h>
35
36#include "select_ops.h"
37#include "select_sync.h"
38
39
40//#define TRACE_WAIT_FOR_OBJECTS
41#ifdef TRACE_WAIT_FOR_OBJECTS
42#	define PRINT(x) dprintf x
43#	define FUNCTION(x) dprintf x
44#else
45#	define PRINT(x) ;
46#	define FUNCTION(x) ;
47#endif
48
49
50using std::nothrow;
51
52
53struct select_sync_pool_entry
54	: DoublyLinkedListLinkImpl<select_sync_pool_entry> {
55	selectsync			*sync;
56	uint16				events;
57};
58
59typedef DoublyLinkedList<select_sync_pool_entry> SelectSyncPoolEntryList;
60
61struct select_sync_pool {
62	SelectSyncPoolEntryList	entries;
63};
64
65
66struct wait_for_objects_sync : public select_sync {
67	sem_id				sem;
68	uint32				count;
69	struct select_info*	set;
70
71	virtual ~wait_for_objects_sync();
72	virtual status_t Notify(select_info* info, uint16 events);
73};
74
75
76select_sync::~select_sync()
77{
78}
79
80
81#if WAIT_FOR_OBJECTS_TRACING
82
83
84namespace WaitForObjectsTracing {
85
86
87class SelectTraceEntry : public AbstractTraceEntry {
88	protected:
89		SelectTraceEntry(int count, fd_set* readSet, fd_set* writeSet,
90			fd_set* errorSet)
91			:
92			fReadSet(NULL),
93			fWriteSet(NULL),
94			fErrorSet(NULL),
95			fCount(count)
96		{
97			int sets = (readSet != NULL ? 1 : 0) + (writeSet != NULL ? 1 : 0)
98				+ (errorSet != NULL ? 1 : 0);
99			if (sets > 0 && count > 0) {
100				uint32 bytes = _howmany(count, NFDBITS) * sizeof(fd_mask);
101				uint8* allocated = (uint8*)alloc_tracing_buffer(bytes * sets);
102				if (allocated != NULL) {
103					if (readSet != NULL) {
104						fReadSet = (fd_set*)allocated;
105						memcpy(fReadSet, readSet, bytes);
106						allocated += bytes;
107					}
108					if (writeSet != NULL) {
109						fWriteSet = (fd_set*)allocated;
110						memcpy(fWriteSet, writeSet, bytes);
111						allocated += bytes;
112					}
113					if (errorSet != NULL) {
114						fErrorSet = (fd_set*)allocated;
115						memcpy(fErrorSet, errorSet, bytes);
116					}
117				}
118			}
119		}
120
121		void AddDump(TraceOutput& out, const char* name)
122		{
123			out.Print(name);
124
125			_PrintSet(out, "read", fReadSet);
126			_PrintSet(out, ", write", fWriteSet);
127			_PrintSet(out, ", error", fErrorSet);
128		}
129
130	private:
131		void _PrintSet(TraceOutput& out, const char* name, fd_set* set)
132		{
133
134			out.Print("%s: <", name);
135
136			if (set != NULL) {
137				bool first = true;
138				for (int i = 0; i < fCount; i++) {
139					if (!FD_ISSET(i, set))
140						continue;
141
142					if (first) {
143						out.Print("%d", i);
144						first = false;
145					} else
146						out.Print(", %d", i);
147				}
148			}
149
150			out.Print(">");
151		}
152
153	protected:
154		fd_set*	fReadSet;
155		fd_set*	fWriteSet;
156		fd_set*	fErrorSet;
157		int		fCount;
158};
159
160
161class SelectBegin : public SelectTraceEntry {
162	public:
163		SelectBegin(int count, fd_set* readSet, fd_set* writeSet,
164			fd_set* errorSet, bigtime_t timeout)
165			:
166			SelectTraceEntry(count, readSet, writeSet, errorSet),
167			fTimeout(timeout)
168		{
169			Initialized();
170		}
171
172		virtual void AddDump(TraceOutput& out)
173		{
174			SelectTraceEntry::AddDump(out, "select begin: ");
175			out.Print(", timeout: %" B_PRIdBIGTIME, fTimeout);
176		}
177
178	private:
179		bigtime_t	fTimeout;
180};
181
182
183class SelectDone : public SelectTraceEntry {
184	public:
185		SelectDone(int count, fd_set* readSet, fd_set* writeSet,
186			fd_set* errorSet, status_t status)
187			:
188			SelectTraceEntry(status == B_OK ? count : 0, readSet, writeSet,
189				errorSet),
190			fStatus(status)
191		{
192			Initialized();
193		}
194
195		virtual void AddDump(TraceOutput& out)
196		{
197			if (fStatus == B_OK)
198				SelectTraceEntry::AddDump(out, "select done:  ");
199			else
200				out.Print("select done:  error: %#" B_PRIx32, fStatus);
201		}
202
203	private:
204		status_t	fStatus;
205};
206
207
208class PollTraceEntry : public AbstractTraceEntry {
209	protected:
210		PollTraceEntry(pollfd* fds, int count, bool resultEvents)
211			:
212			fEntries(NULL),
213			fCount(0)
214		{
215			if (fds != NULL && count > 0) {
216				for (int i = 0; i < count; i++) {
217					if (resultEvents ? fds[i].revents : fds[i].events)
218						fCount++;
219				}
220			}
221
222			if (fCount == 0)
223				return;
224
225			fEntries = (FDEntry*)alloc_tracing_buffer(fCount * sizeof(FDEntry));
226			if (fEntries != NULL) {
227				for (int i = 0; i < fCount; fds++) {
228					uint16 events = resultEvents ? fds->revents : fds->events;
229					if (events != 0) {
230						fEntries[i].fd = fds->fd;
231						fEntries[i].events = events;
232						i++;
233					}
234				}
235			}
236		}
237
238		void AddDump(TraceOutput& out)
239		{
240			if (fEntries == NULL)
241				return;
242
243			static const struct {
244				const char*	name;
245				uint16		event;
246			} kEventNames[] = {
247				{ "r", POLLIN },
248				{ "w", POLLOUT },
249				{ "rb", POLLRDBAND },
250				{ "wb", POLLWRBAND },
251				{ "rp", POLLPRI },
252				{ "err", POLLERR },
253				{ "hup", POLLHUP },
254				{ "inv", POLLNVAL },
255				{ NULL, 0 }
256			};
257
258			bool firstFD = true;
259			for (int i = 0; i < fCount; i++) {
260				if (firstFD) {
261					out.Print("<%u: ", fEntries[i].fd);
262					firstFD = false;
263				} else
264					out.Print(", <%u: ", fEntries[i].fd);
265
266				bool firstEvent = true;
267				for (int k = 0; kEventNames[k].name != NULL; k++) {
268					if ((fEntries[i].events & kEventNames[k].event) != 0) {
269						if (firstEvent) {
270							out.Print("%s", kEventNames[k].name);
271							firstEvent = false;
272						} else
273							out.Print(", %s", kEventNames[k].name);
274					}
275				}
276
277				out.Print(">");
278			}
279		}
280
281	protected:
282		struct FDEntry {
283			uint16	fd;
284			uint16	events;
285		};
286
287		FDEntry*	fEntries;
288		int			fCount;
289};
290
291
292class PollBegin : public PollTraceEntry {
293	public:
294		PollBegin(pollfd* fds, int count, bigtime_t timeout)
295			:
296			PollTraceEntry(fds, count, false),
297			fTimeout(timeout)
298		{
299			Initialized();
300		}
301
302		virtual void AddDump(TraceOutput& out)
303		{
304			out.Print("poll begin: ");
305			PollTraceEntry::AddDump(out);
306			out.Print(", timeout: %" B_PRIdBIGTIME, fTimeout);
307		}
308
309	private:
310		bigtime_t	fTimeout;
311};
312
313
314class PollDone : public PollTraceEntry {
315	public:
316		PollDone(pollfd* fds, int count, int result)
317			:
318			PollTraceEntry(fds, result >= 0 ? count : 0, true),
319			fResult(result)
320		{
321			Initialized();
322		}
323
324		virtual void AddDump(TraceOutput& out)
325		{
326			if (fResult >= 0) {
327				out.Print("poll done:  count: %d: ", fResult);
328				PollTraceEntry::AddDump(out);
329			} else
330				out.Print("poll done:  error: %#x", fResult);
331		}
332
333	private:
334		int		fResult;
335};
336
337}	// namespace WaitForObjectsTracing
338
339#	define T(x)	new(std::nothrow) WaitForObjectsTracing::x
340
341#else
342#	define T(x)
343#endif	// WAIT_FOR_OBJECTS_TRACING
344
345
346// #pragma mark -
347
348
349/*!
350	Clears all bits in the fd_set - since we are using variable sized
351	arrays in the kernel, we can't use the FD_ZERO() macro provided by
352	sys/select.h for this task.
353	All other FD_xxx() macros are safe to use, though.
354*/
355static inline void
356fd_zero(fd_set *set, int numFDs)
357{
358	if (set != NULL)
359		memset(set, 0, _howmany(numFDs, NFDBITS) * sizeof(fd_mask));
360}
361
362
363static status_t
364create_select_sync(int numFDs, wait_for_objects_sync*& _sync)
365{
366	// create sync structure
367	wait_for_objects_sync* sync = new(nothrow) wait_for_objects_sync;
368	if (sync == NULL)
369		return B_NO_MEMORY;
370	ObjectDeleter<wait_for_objects_sync> syncDeleter(sync);
371
372	// create info set
373	sync->set = new(nothrow) select_info[numFDs];
374	if (sync->set == NULL)
375		return B_NO_MEMORY;
376	ArrayDeleter<select_info> setDeleter(sync->set);
377
378	// create select event semaphore
379	sync->sem = create_sem(0, "select");
380	if (sync->sem < 0)
381		return sync->sem;
382
383	sync->count = numFDs;
384
385	for (int i = 0; i < numFDs; i++) {
386		sync->set[i].next = NULL;
387		sync->set[i].sync = sync;
388	}
389
390	setDeleter.Detach();
391	syncDeleter.Detach();
392	_sync = sync;
393
394	return B_OK;
395}
396
397
398void
399acquire_select_sync(select_sync* sync)
400{
401	FUNCTION(("acquire_select_sync(%p)\n", sync));
402	sync->AcquireReference();
403}
404
405
406void
407put_select_sync(select_sync* sync)
408{
409	FUNCTION(("put_select_sync(%p): -> %ld\n", sync, sync->CountReferences() - 1));
410	sync->ReleaseReference();
411}
412
413
414wait_for_objects_sync::~wait_for_objects_sync()
415{
416	delete_sem(sem);
417	delete[] set;
418}
419
420
421status_t
422wait_for_objects_sync::Notify(select_info* info, uint16 events)
423{
424	if (sem < B_OK)
425		return B_BAD_VALUE;
426
427	atomic_or(&info->events, events);
428
429	// only wake up the waiting select()/poll() call if the events
430	// match one of the selected ones
431	if (info->selected_events & events)
432		return release_sem_etc(sem, 1, B_DO_NOT_RESCHEDULE);
433
434	return B_OK;
435}
436
437
438static int
439common_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
440	bigtime_t timeout, const sigset_t *sigMask, bool kernel)
441{
442	status_t status = B_OK;
443	int fd;
444
445	FUNCTION(("[%ld] common_select(%d, %p, %p, %p, %lld, %p, %d)\n",
446		find_thread(NULL), numFDs, readSet, writeSet, errorSet, timeout,
447		sigMask, kernel));
448
449	// check if fds are valid before doing anything
450
451	for (fd = 0; fd < numFDs; fd++) {
452		if (((readSet && FD_ISSET(fd, readSet))
453			|| (writeSet && FD_ISSET(fd, writeSet))
454			|| (errorSet && FD_ISSET(fd, errorSet)))
455			&& !fd_is_valid(fd, kernel))
456			return B_FILE_ERROR;
457	}
458
459	// allocate sync object
460	wait_for_objects_sync* sync;
461	status = create_select_sync(numFDs, sync);
462	if (status != B_OK)
463		return status;
464
465	T(SelectBegin(numFDs, readSet, writeSet, errorSet, timeout));
466
467	// start selecting file descriptors
468
469	for (fd = 0; fd < numFDs; fd++) {
470		sync->set[fd].selected_events = 0;
471		sync->set[fd].events = 0;
472
473		if (readSet && FD_ISSET(fd, readSet)) {
474			sync->set[fd].selected_events = SELECT_FLAG(B_SELECT_READ)
475				| SELECT_FLAG(B_SELECT_DISCONNECTED) | SELECT_FLAG(B_SELECT_ERROR);
476		}
477		if (writeSet && FD_ISSET(fd, writeSet)) {
478			sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_WRITE)
479				| SELECT_FLAG(B_SELECT_ERROR);
480		}
481		if (errorSet && FD_ISSET(fd, errorSet))
482			sync->set[fd].selected_events |= SELECT_FLAG(B_SELECT_ERROR);
483
484		if (sync->set[fd].selected_events != 0) {
485			select_fd(fd, sync->set + fd, kernel);
486				// array position is the same as the fd for select()
487		}
488	}
489
490	// set new signal mask
491	sigset_t oldSigMask;
492	if (sigMask != NULL) {
493		sigprocmask(SIG_SETMASK, sigMask, &oldSigMask);
494		if (!kernel) {
495			Thread *thread = thread_get_current_thread();
496			thread->old_sig_block_mask = oldSigMask;
497			thread->flags |= THREAD_FLAGS_OLD_SIGMASK;
498		}
499	}
500
501	// wait for something to happen
502	status = acquire_sem_etc(sync->sem, 1,
503		B_CAN_INTERRUPT | (timeout >= 0 ? B_ABSOLUTE_TIMEOUT : 0), timeout);
504
505	// restore the old signal mask
506	if (sigMask != NULL && kernel)
507		sigprocmask(SIG_SETMASK, &oldSigMask, NULL);
508
509	PRINT(("common_select(): acquire_sem_etc() returned: %lx\n", status));
510
511	// deselect file descriptors
512
513	for (fd = 0; fd < numFDs; fd++)
514		deselect_fd(fd, sync->set + fd, kernel);
515
516	PRINT(("common_select(): events deselected\n"));
517
518	// collect the events that have happened in the meantime
519
520	int count = 0;
521
522	if (status == B_INTERRUPTED) {
523		// We must not clear the sets in this case, as applications may
524		// rely on the contents of them.
525		put_select_sync(sync);
526		T(SelectDone(numFDs, readSet, writeSet, errorSet, status));
527		return B_INTERRUPTED;
528	}
529
530	// Clear sets to store the received events
531	// (we can't use the macros, because we have variable sized arrays;
532	// the other FD_xxx() macros are safe, though).
533	fd_zero(readSet, numFDs);
534	fd_zero(writeSet, numFDs);
535	fd_zero(errorSet, numFDs);
536
537	if (status == B_OK) {
538		for (count = 0, fd = 0;fd < numFDs; fd++) {
539			if (readSet && sync->set[fd].events & (SELECT_FLAG(B_SELECT_READ)
540					| SELECT_FLAG(B_SELECT_DISCONNECTED) | SELECT_FLAG(B_SELECT_ERROR))) {
541				FD_SET(fd, readSet);
542				count++;
543			}
544			if (writeSet
545				&& sync->set[fd].events & (SELECT_FLAG(B_SELECT_WRITE)
546					| SELECT_FLAG(B_SELECT_ERROR))) {
547				FD_SET(fd, writeSet);
548				count++;
549			}
550			if (errorSet
551				&& sync->set[fd].events & SELECT_FLAG(B_SELECT_ERROR)) {
552				FD_SET(fd, errorSet);
553				count++;
554			}
555		}
556	}
557
558	// B_TIMED_OUT and B_WOULD_BLOCK are supposed to return 0
559
560	put_select_sync(sync);
561
562	T(SelectDone(numFDs, readSet, writeSet, errorSet, status));
563
564	return count;
565}
566
567
568static int
569common_poll(struct pollfd *fds, nfds_t numFDs, bigtime_t timeout,
570	const sigset_t *sigMask, bool kernel)
571{
572	// allocate sync object
573	wait_for_objects_sync* sync;
574	status_t status = create_select_sync(numFDs, sync);
575	if (status != B_OK)
576		return status;
577
578	T(PollBegin(fds, numFDs, timeout));
579
580	// start polling file descriptors (by selecting them)
581
582	bool invalid = false;
583	for (uint32 i = 0; i < numFDs; i++) {
584		int fd = fds[i].fd;
585
586		// initialize events masks
587		sync->set[i].selected_events = fds[i].events
588			| POLLNVAL | POLLERR | POLLHUP;
589		sync->set[i].events = 0;
590		fds[i].revents = 0;
591
592		if (fd >= 0 && select_fd(fd, sync->set + i, kernel) != B_OK) {
593			sync->set[i].events = POLLNVAL;
594			fds[i].revents = POLLNVAL;
595				// indicates that the FD doesn't need to be deselected
596			invalid = true;
597		}
598	}
599
600	// set new signal mask
601	sigset_t oldSigMask;
602	if (sigMask != NULL) {
603		sigprocmask(SIG_SETMASK, sigMask, &oldSigMask);
604		if (!kernel) {
605			Thread *thread = thread_get_current_thread();
606			thread->old_sig_block_mask = oldSigMask;
607			thread->flags |= THREAD_FLAGS_OLD_SIGMASK;
608		}
609	}
610
611	if (!invalid) {
612		status = acquire_sem_etc(sync->sem, 1,
613			B_CAN_INTERRUPT | (timeout >= 0 ? B_ABSOLUTE_TIMEOUT : 0), timeout);
614	}
615
616	// restore the old signal mask
617	if (sigMask != NULL && kernel)
618		sigprocmask(SIG_SETMASK, &oldSigMask, NULL);
619
620	// deselect file descriptors
621
622	for (uint32 i = 0; i < numFDs; i++) {
623		if (fds[i].fd >= 0 && (fds[i].revents & POLLNVAL) == 0)
624			deselect_fd(fds[i].fd, sync->set + i, kernel);
625	}
626
627	// collect the events that have happened in the meantime
628
629	int count = 0;
630	switch (status) {
631		case B_OK:
632			for (uint32 i = 0; i < numFDs; i++) {
633				if (fds[i].fd < 0)
634					continue;
635
636				// POLLxxx flags and B_SELECT_xxx flags are compatible
637				fds[i].revents = sync->set[i].events
638					& sync->set[i].selected_events;
639				if (fds[i].revents != 0)
640					count++;
641			}
642			break;
643		case B_INTERRUPTED:
644			count = B_INTERRUPTED;
645			break;
646		default:
647			// B_TIMED_OUT, and B_WOULD_BLOCK
648			break;
649	}
650
651	put_select_sync(sync);
652
653	T(PollDone(fds, numFDs, count));
654
655	return count;
656}
657
658
659static ssize_t
660common_wait_for_objects(object_wait_info* infos, int numInfos, uint32 flags,
661	bigtime_t timeout, bool kernel)
662{
663	status_t status = B_OK;
664
665	// allocate sync object
666	wait_for_objects_sync* sync;
667	status = create_select_sync(numInfos, sync);
668	if (status != B_OK)
669		return status;
670
671	// start selecting objects
672
673	bool invalid = false;
674	for (int i = 0; i < numInfos; i++) {
675		uint16 type = infos[i].type;
676		int32 object = infos[i].object;
677
678		// initialize events masks
679		sync->set[i].selected_events = infos[i].events
680			| B_EVENT_INVALID | B_EVENT_ERROR | B_EVENT_DISCONNECTED;
681		sync->set[i].events = 0;
682		infos[i].events = 0;
683
684		if (select_object(type, object, sync->set + i, kernel) != B_OK) {
685			sync->set[i].events = B_EVENT_INVALID;
686			infos[i].events = B_EVENT_INVALID;
687				// indicates that the object doesn't need to be deselected
688			invalid = true;
689		}
690	}
691
692	if (!invalid) {
693		status = acquire_sem_etc(sync->sem, 1, B_CAN_INTERRUPT | flags,
694			timeout);
695	}
696
697	// deselect objects
698
699	for (int i = 0; i < numInfos; i++) {
700		uint16 type = infos[i].type;
701
702		if ((infos[i].events & B_EVENT_INVALID) == 0)
703			deselect_object(type, infos[i].object, sync->set + i, kernel);
704	}
705
706	// collect the events that have happened in the meantime
707
708	ssize_t count = 0;
709	if (status == B_OK) {
710		for (int i = 0; i < numInfos; i++) {
711			infos[i].events = sync->set[i].events
712				& sync->set[i].selected_events;
713			if (infos[i].events != 0)
714				count++;
715		}
716	} else {
717		// B_INTERRUPTED, B_TIMED_OUT, and B_WOULD_BLOCK
718		count = status;
719	}
720
721	put_select_sync(sync);
722
723	return count;
724}
725
726
727// #pragma mark - kernel private
728
729
730status_t
731notify_select_events(select_info* info, uint16 events)
732{
733	FUNCTION(("notify_select_events(%p (%p), 0x%x)\n", info, info->sync,
734		events));
735
736	if (info == NULL || info->sync == NULL)
737		return B_BAD_VALUE;
738
739	return info->sync->Notify(info, events);
740}
741
742
743void
744notify_select_events_list(select_info* list, uint16 events)
745{
746	struct select_info* info = list;
747	while (info != NULL) {
748		select_info* next = info->next;
749		notify_select_events(info, events);
750		info = next;
751	}
752}
753
754
755//	#pragma mark - public kernel API
756
757
758status_t
759notify_select_event(struct selectsync *sync, uint8 event)
760{
761	return notify_select_events((select_info*)sync, SELECT_FLAG(event));
762}
763
764
765//	#pragma mark - private kernel exported API
766
767
768static select_sync_pool_entry *
769find_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync)
770{
771	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
772		 it.HasNext();) {
773		select_sync_pool_entry *entry = it.Next();
774		if (entry->sync == sync)
775			return entry;
776	}
777
778	return NULL;
779}
780
781
782static status_t
783add_select_sync_pool_entry(select_sync_pool *pool, selectsync *sync,
784	uint8 event)
785{
786	// check, whether the entry does already exist
787	select_sync_pool_entry *entry = find_select_sync_pool_entry(pool, sync);
788	if (!entry) {
789		entry = new (std::nothrow) select_sync_pool_entry;
790		if (!entry)
791			return B_NO_MEMORY;
792
793		entry->sync = sync;
794		entry->events = 0;
795
796		pool->entries.Add(entry);
797	}
798
799	entry->events |= SELECT_FLAG(event);
800
801	return B_OK;
802}
803
804
805status_t
806add_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync,
807	uint8 event)
808{
809	// create the pool, if necessary
810	select_sync_pool *pool = *_pool;
811	if (!pool) {
812		pool = new (std::nothrow) select_sync_pool;
813		if (!pool)
814			return B_NO_MEMORY;
815
816		*_pool = pool;
817	}
818
819	// add the entry
820	status_t error = add_select_sync_pool_entry(pool, sync, event);
821
822	// cleanup
823	if (pool->entries.IsEmpty()) {
824		delete pool;
825		*_pool = NULL;
826	}
827
828	return error;
829}
830
831
832status_t
833remove_select_sync_pool_entry(select_sync_pool **_pool, selectsync *sync,
834	uint8 event)
835{
836	select_sync_pool *pool = *_pool;
837	if (!pool)
838		return B_ENTRY_NOT_FOUND;
839
840	// clear the event flag of the concerned entries
841	bool found = false;
842	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
843		 it.HasNext();) {
844		select_sync_pool_entry *entry = it.Next();
845		if (entry->sync == sync) {
846			found = true;
847			entry->events &= ~SELECT_FLAG(event);
848
849			// remove the entry, if no longer needed
850			if (entry->events == 0) {
851				it.Remove();
852				delete entry;
853			}
854		}
855	}
856
857	if (!found)
858		return B_ENTRY_NOT_FOUND;
859
860	// delete the pool, if no longer needed
861	if (pool->entries.IsEmpty()) {
862		delete pool;
863		*_pool = NULL;
864	}
865
866	return B_OK;
867}
868
869
870void
871delete_select_sync_pool(select_sync_pool *pool)
872{
873	if (!pool)
874		return;
875
876	while (select_sync_pool_entry *entry = pool->entries.Head()) {
877		pool->entries.Remove(entry);
878		delete entry;
879	}
880
881	delete pool;
882}
883
884
885void
886notify_select_event_pool(select_sync_pool *pool, uint8 event)
887{
888	if (!pool)
889		return;
890
891	FUNCTION(("notify_select_event_pool(%p, %u)\n", pool, event));
892
893	for (SelectSyncPoolEntryList::Iterator it = pool->entries.GetIterator();
894		 it.HasNext();) {
895		select_sync_pool_entry *entry = it.Next();
896		if (entry->events & SELECT_FLAG(event))
897			notify_select_event(entry->sync, event);
898	}
899}
900
901
902//	#pragma mark - Kernel POSIX layer
903
904
905ssize_t
906_kern_select(int numFDs, fd_set *readSet, fd_set *writeSet, fd_set *errorSet,
907	bigtime_t timeout, const sigset_t *sigMask)
908{
909	if (timeout >= 0)
910		timeout += system_time();
911
912	return common_select(numFDs, readSet, writeSet, errorSet, timeout,
913		sigMask, true);
914}
915
916
917ssize_t
918_kern_poll(struct pollfd *fds, int numFDs, bigtime_t timeout,
919	const sigset_t *sigMask)
920{
921	if (timeout >= 0)
922		timeout += system_time();
923
924	return common_poll(fds, numFDs, timeout, sigMask, true);
925}
926
927
928ssize_t
929_kern_wait_for_objects(object_wait_info* infos, int numInfos, uint32 flags,
930	bigtime_t timeout)
931{
932	return common_wait_for_objects(infos, numInfos, flags, timeout, true);
933}
934
935
936//	#pragma mark - User syscalls
937
938
939static bool
940check_max_fds(int numFDs)
941{
942	if (numFDs <= 0)
943		return true;
944
945	struct io_context *context = get_current_io_context(false);
946	MutexLocker(&context->io_mutex);
947	return (size_t)numFDs <= context->table_size;
948}
949
950
951ssize_t
952_user_select(int numFDs, fd_set *userReadSet, fd_set *userWriteSet,
953	fd_set *userErrorSet, bigtime_t timeout, const sigset_t *userSigMask)
954{
955	uint32 bytes = _howmany(numFDs, NFDBITS) * sizeof(fd_mask);
956	int result;
957
958	if (timeout >= 0) {
959		timeout += system_time();
960		// deal with overflow
961		if (timeout < 0)
962			timeout = B_INFINITE_TIMEOUT;
963	}
964
965	if (numFDs < 0 || !check_max_fds(numFDs))
966		return B_BAD_VALUE;
967
968	if ((userReadSet != NULL && !IS_USER_ADDRESS(userReadSet))
969		|| (userWriteSet != NULL && !IS_USER_ADDRESS(userWriteSet))
970		|| (userErrorSet != NULL && !IS_USER_ADDRESS(userErrorSet))
971		|| (userSigMask != NULL && !IS_USER_ADDRESS(userSigMask)))
972		return B_BAD_ADDRESS;
973
974	// copy parameters
975
976	BStackOrHeapArray<char, 128> sets(bytes * (
977		((userReadSet != NULL) ? 1 : 0) +
978		((userWriteSet != NULL) ? 1 : 0) +
979		((userErrorSet != NULL) ? 1 : 0)));
980	if (!sets.IsValid())
981		return B_NO_MEMORY;
982
983	char *nextSet = &sets[0];
984	fd_set *readSet = NULL, *writeSet = NULL, *errorSet = NULL;
985
986	if (userReadSet != NULL) {
987		readSet = (fd_set *)nextSet;
988		nextSet += bytes;
989
990		if (user_memcpy(readSet, userReadSet, bytes) != B_OK)
991			return B_BAD_ADDRESS;
992	}
993
994	if (userWriteSet != NULL) {
995		writeSet = (fd_set *)nextSet;
996		nextSet += bytes;
997
998		if (user_memcpy(writeSet, userWriteSet, bytes) != B_OK)
999			return B_BAD_ADDRESS;
1000	}
1001
1002	if (userErrorSet != NULL) {
1003		errorSet = (fd_set *)nextSet;
1004
1005		if (user_memcpy(errorSet, userErrorSet, bytes) != B_OK)
1006			return B_BAD_ADDRESS;
1007	}
1008
1009	sigset_t sigMask;
1010	if (userSigMask != NULL
1011			&& user_memcpy(&sigMask, userSigMask, sizeof(sigMask)) != B_OK) {
1012		return B_BAD_ADDRESS;
1013	}
1014
1015	result = common_select(numFDs, readSet, writeSet, errorSet, timeout,
1016		userSigMask ? &sigMask : NULL, false);
1017
1018	// copy back results
1019
1020	if (result >= B_OK
1021		&& ((readSet != NULL
1022				&& user_memcpy(userReadSet, readSet, bytes) < B_OK)
1023			|| (writeSet != NULL
1024				&& user_memcpy(userWriteSet, writeSet, bytes) < B_OK)
1025			|| (errorSet != NULL
1026				&& user_memcpy(userErrorSet, errorSet, bytes) < B_OK))) {
1027		result = B_BAD_ADDRESS;
1028	}
1029
1030	return result;
1031}
1032
1033
1034ssize_t
1035_user_poll(struct pollfd *userfds, int numFDs, bigtime_t timeout,
1036	const sigset_t *userSigMask)
1037{
1038	if (timeout >= 0) {
1039		timeout += system_time();
1040		// deal with overflow
1041		if (timeout < 0)
1042			timeout = B_INFINITE_TIMEOUT;
1043	}
1044
1045	if (numFDs < 0 || !check_max_fds(numFDs))
1046		return B_BAD_VALUE;
1047
1048	BStackOrHeapArray<struct pollfd, 16> fds(numFDs);
1049	if (!fds.IsValid())
1050		return B_NO_MEMORY;
1051
1052	const size_t bytes = numFDs * sizeof(struct pollfd);
1053	if (numFDs != 0) {
1054		if (userfds == NULL || !IS_USER_ADDRESS(userfds))
1055			return B_BAD_ADDRESS;
1056
1057		if (user_memcpy(fds, userfds, bytes) < B_OK)
1058			return B_BAD_ADDRESS;
1059	}
1060
1061	sigset_t sigMask;
1062	if (userSigMask != NULL
1063		&& (!IS_USER_ADDRESS(userSigMask)
1064			|| user_memcpy(&sigMask, userSigMask, sizeof(sigMask)) < B_OK)) {
1065		return B_BAD_ADDRESS;
1066	}
1067
1068	status_t result = common_poll(fds, numFDs, timeout,
1069		userSigMask != NULL ? &sigMask : NULL, false);
1070
1071	// copy back results
1072	if (numFDs > 0 && user_memcpy(userfds, fds, bytes) != 0) {
1073		if (result >= 0)
1074			result = B_BAD_ADDRESS;
1075	}
1076
1077	return result;
1078}
1079
1080
1081ssize_t
1082_user_wait_for_objects(object_wait_info* userInfos, int numInfos, uint32 flags,
1083	bigtime_t timeout)
1084{
1085	syscall_restart_handle_timeout_pre(flags, timeout);
1086
1087	if (numInfos < 0 || !check_max_fds(numInfos - sem_max_sems()
1088			- port_max_ports() - thread_max_threads())) {
1089		return B_BAD_VALUE;
1090	}
1091
1092	if (numInfos == 0) {
1093		// special case: no infos
1094		ssize_t result = common_wait_for_objects(NULL, 0, flags, timeout,
1095			false);
1096		return result < 0
1097			? syscall_restart_handle_timeout_post(result, timeout) : result;
1098	}
1099
1100	if (userInfos == NULL || !IS_USER_ADDRESS(userInfos))
1101		return B_BAD_ADDRESS;
1102
1103	BStackOrHeapArray<object_wait_info, 16> infos(numInfos);
1104	if (!infos.IsValid())
1105		return B_NO_MEMORY;
1106	const int bytes = sizeof(object_wait_info) * numInfos;
1107
1108	if (user_memcpy(infos, userInfos, bytes) != B_OK)
1109		return B_BAD_ADDRESS;
1110
1111	ssize_t result = common_wait_for_objects(infos, numInfos, flags, timeout, false);
1112
1113	if (result >= 0 && user_memcpy(userInfos, infos, bytes) != B_OK) {
1114		result = B_BAD_ADDRESS;
1115	} else {
1116		syscall_restart_handle_timeout_post(result, timeout);
1117	}
1118
1119	return result;
1120}
1121