1/*
2 * Copyright 2023, Haiku, Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 */
5#include <sys/event.h>
6
7#include <StackOrHeapArray.h>
8
9#include <libroot/errno_private.h>
10#include <syscalls.h>
11#include <event_queue_defs.h>
12
13
14extern "C" int
15kqueue()
16{
17	int fd = _kern_event_queue_create(0);
18	if (fd < 0) {
19		__set_errno(fd);
20		return -1;
21	}
22	return fd;
23}
24
25
26static short
27filter_from_info(const event_wait_info& info)
28{
29	switch (info.type) {
30		case B_OBJECT_TYPE_FD:
31			if (info.events > 0 && (info.events & B_EVENT_WRITE) != 0)
32				return EVFILT_WRITE;
33			return EVFILT_READ;
34
35		case B_OBJECT_TYPE_THREAD:
36			return EVFILT_PROC;
37	}
38
39	return 0;
40}
41
42
43extern "C" int
44kevent(int kq,
45	const struct kevent *changelist, int nchanges,
46	struct kevent *eventlist, int nevents,
47	const struct timespec *tspec)
48{
49	BStackOrHeapArray<event_wait_info, 16> waitInfos(max_c(nchanges, nevents));
50
51	event_wait_info* waitInfo = waitInfos;
52	int changedInfos = 0;
53
54	for (int i = 0; i < nchanges; i++) {
55		waitInfo->object = changelist[i].ident;
56		waitInfo->events = 0;
57		waitInfo->user_data = changelist[i].udata;
58
59		int32 events = 0, behavior = 0;
60		switch (changelist[i].filter) {
61			case EVFILT_READ:
62				waitInfo->type = B_OBJECT_TYPE_FD;
63				events = B_EVENT_READ;
64				break;
65
66			case EVFILT_WRITE:
67				waitInfo->type = B_OBJECT_TYPE_FD;
68				events = B_EVENT_WRITE;
69				break;
70
71			case EVFILT_PROC:
72				waitInfo->type = B_OBJECT_TYPE_THREAD;
73				if ((changelist[i].fflags & NOTE_EXIT) != 0)
74					events |= B_EVENT_INVALID;
75				break;
76
77			default:
78				return EINVAL;
79		}
80
81		if ((changelist[i].flags & EV_ONESHOT) != 0)
82			behavior |= B_EVENT_ONE_SHOT;
83		if ((changelist[i].flags & EV_CLEAR) == 0)
84			behavior |= B_EVENT_LEVEL_TRIGGERED;
85
86		if (changelist[i].filter == EVFILT_READ || changelist[i].filter == EVFILT_WRITE) {
87			// kqueue treats the same file descriptor with both READ and WRITE filters
88			// as two separate listeners. Haiku, however, treats it as one.
89			// We rectify this here by carefully combining the two.
90
91			// We can't support ONESHOT for descriptors due to the separation.
92			if ((changelist[i].flags & EV_ONESHOT) != 0) {
93				__set_errno(EOPNOTSUPP);
94				return -1;
95			}
96
97			const short otherFilter = (changelist[i].filter == EVFILT_READ)
98				? EVFILT_WRITE : EVFILT_READ;
99			const int32 otherEvents = (otherFilter == EVFILT_READ)
100				? B_EVENT_READ : B_EVENT_WRITE;
101
102			// First, check if the other filter is specified in this changelist.
103			int j;
104			for (j = 0; j < nchanges; j++) {
105				if (changelist[j].ident != changelist[i].ident)
106					continue;
107				if (changelist[j].filter != otherFilter)
108					continue;
109
110				// We've found it.
111				break;
112			}
113			if (j < nchanges) {
114				// It is in the list.
115				if (j < i) {
116					// And it's already been taken care of.
117					continue;
118				}
119
120				// Fold it into this one.
121				if ((changelist[j].flags & EV_ADD) != 0) {
122					waitInfo->events |= otherEvents;
123				} else if ((changelist[j].flags & EV_DELETE) != 0) {
124					waitInfo->events &= ~otherEvents;
125				}
126			} else {
127				// It is not in the list. See if it's already set.
128				event_wait_info info;
129				info.type = B_OBJECT_TYPE_FD;
130				info.object = waitInfo->object;
131				info.events = -1;
132
133				status_t status = _kern_event_queue_select(kq, &info, 1);
134				if (status == B_OK)
135					waitInfo->events |= (info.events & otherEvents);
136			}
137		}
138
139		if ((changelist[i].flags & EV_ADD) != 0) {
140			waitInfo->events |= events;
141		} else if ((changelist[i].flags & EV_DELETE) != 0) {
142			waitInfo->events &= ~events;
143		}
144
145		if (waitInfo->events != 0)
146			waitInfo->events |= behavior;
147
148		changedInfos++;
149		waitInfo++;
150	}
151	if (changedInfos != 0) {
152		status_t status = _kern_event_queue_select(kq, waitInfos, changedInfos);
153		if (status != B_OK) {
154			if (nchanges == 1 && nevents == 0) {
155				// Special case: return the lone error directly.
156				__set_errno(waitInfos[0].events);
157				return -1;
158			}
159
160			// Report problems as error events.
161			int errors = 0;
162			for (int i = 0; i < changedInfos; i++) {
163				if (waitInfos[i].events > 0)
164					continue;
165				if (nevents == 0)
166					break;
167
168				short filter = filter_from_info(waitInfos[i]);
169				int64_t data = waitInfos[i].events;
170				EV_SET(eventlist, waitInfos[i].object,
171					filter, EV_ERROR, 0, data, waitInfos[i].user_data);
172				eventlist++;
173				nevents--;
174				errors++;
175			}
176			if (nevents == 0 || errors == 0) {
177				__set_errno(status);
178				return -1;
179			}
180		}
181	}
182
183	if (nevents != 0) {
184		bigtime_t timeout = 0;
185		uint32 waitFlags = 0;
186		if (tspec != NULL) {
187			timeout = (tspec->tv_sec * 1000000LL) + (tspec->tv_nsec / 1000LL);
188			waitFlags |= B_RELATIVE_TIMEOUT;
189		}
190
191		ssize_t events = _kern_event_queue_wait(kq, waitInfos,
192			max_c(1, nevents / 2), waitFlags, timeout);
193		if (events > 0) {
194			int returnedEvents = 0;
195			for (ssize_t i = 0; i < events; i++) {
196				unsigned short flags = 0;
197				unsigned int fflags = 0;
198				int64_t data = 0;
199
200				if (waitInfos[i].events < 0) {
201					flags |= EV_ERROR;
202					data = waitInfos[i].events;
203				} else if ((waitInfos[i].events & B_EVENT_DISCONNECTED) != 0) {
204					flags |= EV_EOF;
205				} else if ((waitInfos[i].events & B_EVENT_INVALID) != 0) {
206					switch (waitInfos[i].type) {
207						case B_OBJECT_TYPE_FD:
208							flags |= EV_EOF;
209							break;
210
211						case B_OBJECT_TYPE_THREAD: {
212							fflags |= NOTE_EXIT;
213
214							status_t returnValue = -1;
215							status_t status = wait_for_thread(waitInfos[i].object, &returnValue);
216							if (status == B_OK)
217								data = returnValue;
218							else
219								data = -1;
220							break;
221						}
222					}
223				} else if ((waitInfos[i].events & B_EVENT_ERROR) != 0) {
224					flags |= EV_ERROR;
225					data = EINVAL;
226				}
227
228				short filter = filter_from_info(waitInfos[i]);
229				if (waitInfos[i].type == B_OBJECT_TYPE_FD && (flags & (EV_ERROR | EV_EOF)) == 0) {
230					// Do we have both a read and a write event?
231					if ((waitInfos[i].events & (B_EVENT_READ | B_EVENT_WRITE))
232							== (B_EVENT_READ | B_EVENT_WRITE)) {
233						// We do. Report both, if we can.
234						if (nevents > 1) {
235							EV_SET(eventlist, waitInfos[i].object,
236								EVFILT_WRITE, flags, fflags, data, waitInfos[i].user_data);
237							eventlist++;
238							returnedEvents++;
239							nevents--;
240						}
241						filter = EVFILT_READ;
242					}
243				}
244
245				EV_SET(eventlist, waitInfos[i].object,
246					filter, flags, fflags, data, waitInfos[i].user_data);
247				eventlist++;
248				returnedEvents++;
249				nevents--;
250			}
251			return returnedEvents;
252		} else if (events < 0) {
253			if (events == B_WOULD_BLOCK || events == B_TIMED_OUT)
254				return 0;
255
256			__set_errno(events);
257			return -1;
258		}
259		return 0;
260	}
261
262	return 0;
263}
264