1283441Sdchagin/*- 2283441Sdchagin * Copyright (c) 2007 Roman Divacky 3283441Sdchagin * Copyright (c) 2014 Dmitry Chagin 4283441Sdchagin * All rights reserved. 5283441Sdchagin * 6283441Sdchagin * Redistribution and use in source and binary forms, with or without 7283441Sdchagin * modification, are permitted provided that the following conditions 8283441Sdchagin * are met: 9283441Sdchagin * 1. Redistributions of source code must retain the above copyright 10283441Sdchagin * notice, this list of conditions and the following disclaimer. 11283441Sdchagin * 2. Redistributions in binary form must reproduce the above copyright 12283441Sdchagin * notice, this list of conditions and the following disclaimer in the 13283441Sdchagin * documentation and/or other materials provided with the distribution. 14283441Sdchagin * 15283441Sdchagin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16283441Sdchagin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17283441Sdchagin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18283441Sdchagin * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19283441Sdchagin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20283441Sdchagin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21283441Sdchagin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22283441Sdchagin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23283441Sdchagin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24283441Sdchagin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25283441Sdchagin * SUCH DAMAGE. 26283441Sdchagin */ 27283441Sdchagin 28283441Sdchagin#include <sys/cdefs.h> 29283441Sdchagin__FBSDID("$FreeBSD: releng/10.3/sys/compat/linux/linux_event.c 293606 2016-01-09 18:23:34Z dchagin $"); 30283441Sdchagin 31283441Sdchagin#include "opt_compat.h" 32283441Sdchagin 33283441Sdchagin#include <sys/param.h> 34283441Sdchagin#include <sys/systm.h> 35283441Sdchagin#include <sys/imgact.h> 36283441Sdchagin#include <sys/kernel.h> 37283441Sdchagin#include <sys/limits.h> 38283441Sdchagin#include <sys/lock.h> 39283441Sdchagin#include <sys/mutex.h> 40283441Sdchagin#include <sys/capability.h> 41283441Sdchagin#include <sys/types.h> 42283441Sdchagin#include <sys/file.h> 43283441Sdchagin#include <sys/filedesc.h> 44283441Sdchagin#include <sys/errno.h> 45283441Sdchagin#include <sys/event.h> 46293549Sdchagin#include <sys/poll.h> 47283441Sdchagin#include <sys/proc.h> 48293549Sdchagin#include <sys/selinfo.h> 49283441Sdchagin#include <sys/sx.h> 50283441Sdchagin#include <sys/syscallsubr.h> 51283441Sdchagin#include <sys/timespec.h> 52283441Sdchagin 53283441Sdchagin#ifdef COMPAT_LINUX32 54283441Sdchagin#include <machine/../linux32/linux.h> 55283441Sdchagin#include <machine/../linux32/linux32_proto.h> 56283441Sdchagin#else 57283441Sdchagin#include <machine/../linux/linux.h> 58283441Sdchagin#include <machine/../linux/linux_proto.h> 59283441Sdchagin#endif 60283441Sdchagin 61283441Sdchagin#include <compat/linux/linux_emul.h> 62283441Sdchagin#include <compat/linux/linux_event.h> 63283441Sdchagin#include <compat/linux/linux_file.h> 64283441Sdchagin#include <compat/linux/linux_util.h> 65283441Sdchagin 66283441Sdchagin/* 67283441Sdchagin * epoll defines 'struct epoll_event' with the field 'data' as 64 bits 68283441Sdchagin * on all architectures. But on 32 bit architectures BSD 'struct kevent' only 69283441Sdchagin * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied 70283441Sdchagin * data verbatuim. Therefore we allocate 64-bit memory block to pass 71283441Sdchagin * user supplied data for every file descriptor. 72283441Sdchagin */ 73283441Sdchagin 74283441Sdchagintypedef uint64_t epoll_udata_t; 75283441Sdchagin 76283441Sdchaginstruct epoll_emuldata { 77283441Sdchagin uint32_t fdc; /* epoll udata max index */ 78283441Sdchagin epoll_udata_t udata[1]; /* epoll user data vector */ 79283441Sdchagin}; 80283441Sdchagin 81283441Sdchagin#define EPOLL_DEF_SZ 16 82283441Sdchagin#define EPOLL_SIZE(fdn) \ 83283441Sdchagin (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) 84283441Sdchagin 85283441Sdchaginstruct epoll_event { 86283441Sdchagin uint32_t events; 87283441Sdchagin epoll_udata_t data; 88283441Sdchagin} 89283441Sdchagin#if defined(__amd64__) 90283441Sdchagin__attribute__((packed)) 91283441Sdchagin#endif 92283441Sdchagin; 93283441Sdchagin 94283441Sdchagin#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 95283441Sdchagin 96283441Sdchaginstatic void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); 97283441Sdchaginstatic int epoll_to_kevent(struct thread *td, struct file *epfp, 98283441Sdchagin int fd, struct epoll_event *l_event, int *kev_flags, 99283441Sdchagin struct kevent *kevent, int *nkevents); 100283441Sdchaginstatic void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 101283441Sdchaginstatic int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 102283441Sdchaginstatic int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 103283441Sdchaginstatic int epoll_delete_event(struct thread *td, struct file *epfp, 104283441Sdchagin int fd, int filter); 105283441Sdchaginstatic int epoll_delete_all_events(struct thread *td, struct file *epfp, 106283441Sdchagin int fd); 107283441Sdchagin 108283441Sdchaginstruct epoll_copyin_args { 109283441Sdchagin struct kevent *changelist; 110283441Sdchagin}; 111283441Sdchagin 112283441Sdchaginstruct epoll_copyout_args { 113283441Sdchagin struct epoll_event *leventlist; 114283441Sdchagin struct proc *p; 115283441Sdchagin uint32_t count; 116283441Sdchagin int error; 117283441Sdchagin}; 118283441Sdchagin 119293549Sdchagin/* eventfd */ 120293549Sdchagintypedef uint64_t eventfd_t; 121283441Sdchagin 122293549Sdchaginstatic fo_rdwr_t eventfd_read; 123293549Sdchaginstatic fo_rdwr_t eventfd_write; 124293549Sdchaginstatic fo_truncate_t eventfd_truncate; 125293549Sdchaginstatic fo_ioctl_t eventfd_ioctl; 126293549Sdchaginstatic fo_poll_t eventfd_poll; 127293549Sdchaginstatic fo_kqfilter_t eventfd_kqfilter; 128293549Sdchaginstatic fo_stat_t eventfd_stat; 129293549Sdchaginstatic fo_close_t eventfd_close; 130293549Sdchagin 131293549Sdchaginstatic struct fileops eventfdops = { 132293549Sdchagin .fo_read = eventfd_read, 133293549Sdchagin .fo_write = eventfd_write, 134293549Sdchagin .fo_truncate = eventfd_truncate, 135293549Sdchagin .fo_ioctl = eventfd_ioctl, 136293549Sdchagin .fo_poll = eventfd_poll, 137293549Sdchagin .fo_kqfilter = eventfd_kqfilter, 138293549Sdchagin .fo_stat = eventfd_stat, 139293549Sdchagin .fo_close = eventfd_close, 140293549Sdchagin .fo_chmod = invfo_chmod, 141293549Sdchagin .fo_chown = invfo_chown, 142293549Sdchagin .fo_sendfile = invfo_sendfile, 143293549Sdchagin .fo_flags = DFLAG_PASSABLE 144293549Sdchagin}; 145293549Sdchagin 146293549Sdchaginstatic void filt_eventfddetach(struct knote *kn); 147293549Sdchaginstatic int filt_eventfdread(struct knote *kn, long hint); 148293549Sdchaginstatic int filt_eventfdwrite(struct knote *kn, long hint); 149293549Sdchagin 150293549Sdchaginstatic struct filterops eventfd_rfiltops = { 151293549Sdchagin .f_isfd = 1, 152293549Sdchagin .f_detach = filt_eventfddetach, 153293549Sdchagin .f_event = filt_eventfdread 154293549Sdchagin}; 155293549Sdchaginstatic struct filterops eventfd_wfiltops = { 156293549Sdchagin .f_isfd = 1, 157293549Sdchagin .f_detach = filt_eventfddetach, 158293549Sdchagin .f_event = filt_eventfdwrite 159293549Sdchagin}; 160293549Sdchagin 161293549Sdchaginstruct eventfd { 162293549Sdchagin eventfd_t efd_count; 163293549Sdchagin uint32_t efd_flags; 164293549Sdchagin struct selinfo efd_sel; 165293549Sdchagin struct mtx efd_lock; 166293549Sdchagin}; 167293549Sdchagin 168293549Sdchaginstatic int eventfd_create(struct thread *td, uint32_t initval, int flags); 169293549Sdchagin 170293549Sdchagin 171283441Sdchaginstatic void 172283441Sdchaginepoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) 173283441Sdchagin{ 174283441Sdchagin struct linux_pemuldata *pem; 175283441Sdchagin struct epoll_emuldata *emd; 176283441Sdchagin struct proc *p; 177283441Sdchagin 178283441Sdchagin p = td->td_proc; 179283441Sdchagin 180283441Sdchagin pem = pem_find(p); 181283441Sdchagin KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 182283441Sdchagin 183283441Sdchagin LINUX_PEM_XLOCK(pem); 184283441Sdchagin if (pem->epoll == NULL) { 185283441Sdchagin emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 186283441Sdchagin emd->fdc = fd; 187283441Sdchagin pem->epoll = emd; 188283441Sdchagin } else { 189283441Sdchagin emd = pem->epoll; 190283441Sdchagin if (fd > emd->fdc) { 191283441Sdchagin emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 192283441Sdchagin emd->fdc = fd; 193283441Sdchagin pem->epoll = emd; 194283441Sdchagin } 195283441Sdchagin } 196283441Sdchagin emd->udata[fd] = udata; 197283441Sdchagin LINUX_PEM_XUNLOCK(pem); 198283441Sdchagin} 199283441Sdchagin 200283441Sdchaginstatic int 201283441Sdchaginepoll_create_common(struct thread *td, int flags) 202283441Sdchagin{ 203283441Sdchagin int error; 204283441Sdchagin 205283441Sdchagin error = kern_kqueue(td, flags); 206283441Sdchagin if (error) 207283441Sdchagin return (error); 208283441Sdchagin 209283441Sdchagin epoll_fd_install(td, EPOLL_DEF_SZ, 0); 210283441Sdchagin 211283441Sdchagin return (0); 212283441Sdchagin} 213283441Sdchagin 214283441Sdchaginint 215283441Sdchaginlinux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 216283441Sdchagin{ 217283441Sdchagin 218283441Sdchagin /* 219283441Sdchagin * args->size is unused. Linux just tests it 220283441Sdchagin * and then forgets it as well. 221283441Sdchagin */ 222283441Sdchagin if (args->size <= 0) 223283441Sdchagin return (EINVAL); 224283441Sdchagin 225283441Sdchagin return (epoll_create_common(td, 0)); 226283441Sdchagin} 227283441Sdchagin 228283441Sdchaginint 229283441Sdchaginlinux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 230283441Sdchagin{ 231283441Sdchagin int flags; 232283441Sdchagin 233283441Sdchagin if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 234283441Sdchagin return (EINVAL); 235283441Sdchagin 236283441Sdchagin flags = 0; 237283441Sdchagin if ((args->flags & LINUX_O_CLOEXEC) != 0) 238283441Sdchagin flags |= O_CLOEXEC; 239283441Sdchagin 240283441Sdchagin return (epoll_create_common(td, flags)); 241283441Sdchagin} 242283441Sdchagin 243283441Sdchagin/* Structure converting function from epoll to kevent. */ 244283441Sdchaginstatic int 245283441Sdchaginepoll_to_kevent(struct thread *td, struct file *epfp, 246283441Sdchagin int fd, struct epoll_event *l_event, int *kev_flags, 247283441Sdchagin struct kevent *kevent, int *nkevents) 248283441Sdchagin{ 249283441Sdchagin uint32_t levents = l_event->events; 250283441Sdchagin struct linux_pemuldata *pem; 251283441Sdchagin struct proc *p; 252283441Sdchagin 253283441Sdchagin /* flags related to how event is registered */ 254283441Sdchagin if ((levents & LINUX_EPOLLONESHOT) != 0) 255283441Sdchagin *kev_flags |= EV_ONESHOT; 256283441Sdchagin if ((levents & LINUX_EPOLLET) != 0) 257283441Sdchagin *kev_flags |= EV_CLEAR; 258293571Sdchagin if ((levents & LINUX_EPOLLERR) != 0) 259293571Sdchagin *kev_flags |= EV_ERROR; 260293606Sdchagin if ((levents & LINUX_EPOLLRDHUP) != 0) 261293606Sdchagin *kev_flags |= EV_EOF; 262283441Sdchagin 263283441Sdchagin /* flags related to what event is registered */ 264283441Sdchagin if ((levents & LINUX_EPOLL_EVRD) != 0) { 265283441Sdchagin EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); 266283441Sdchagin ++(*nkevents); 267283441Sdchagin } 268283441Sdchagin if ((levents & LINUX_EPOLL_EVWR) != 0) { 269283441Sdchagin EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); 270283441Sdchagin ++(*nkevents); 271283441Sdchagin } 272283441Sdchagin 273283441Sdchagin if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 274283441Sdchagin p = td->td_proc; 275283441Sdchagin 276283441Sdchagin pem = pem_find(p); 277283441Sdchagin KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 278283441Sdchagin KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); 279283441Sdchagin 280283441Sdchagin LINUX_PEM_XLOCK(pem); 281283441Sdchagin if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 282283441Sdchagin pem->flags |= LINUX_XUNSUP_EPOLL; 283283441Sdchagin LINUX_PEM_XUNLOCK(pem); 284283441Sdchagin linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", 285283441Sdchagin levents); 286283441Sdchagin } else 287283441Sdchagin LINUX_PEM_XUNLOCK(pem); 288283441Sdchagin return (EINVAL); 289283441Sdchagin } 290283441Sdchagin 291283441Sdchagin return (0); 292283441Sdchagin} 293283441Sdchagin 294283441Sdchagin/* 295283441Sdchagin * Structure converting function from kevent to epoll. In a case 296283441Sdchagin * this is called on error in registration we store the error in 297283441Sdchagin * event->data and pick it up later in linux_epoll_ctl(). 298283441Sdchagin */ 299283441Sdchaginstatic void 300283441Sdchaginkevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 301283441Sdchagin{ 302283441Sdchagin 303293571Sdchagin if ((kevent->flags & EV_ERROR) != 0) { 304293571Sdchagin l_event->events = LINUX_EPOLLERR; 305283441Sdchagin return; 306293571Sdchagin } 307283441Sdchagin 308283441Sdchagin switch (kevent->filter) { 309283441Sdchagin case EVFILT_READ: 310283441Sdchagin l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; 311293606Sdchagin if ((kevent->flags & EV_EOF) != 0) 312293606Sdchagin l_event->events |= LINUX_EPOLLRDHUP; 313283441Sdchagin break; 314283441Sdchagin case EVFILT_WRITE: 315283441Sdchagin l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; 316283441Sdchagin break; 317283441Sdchagin } 318283441Sdchagin} 319283441Sdchagin 320283441Sdchagin/* 321283441Sdchagin * Copyout callback used by kevent. This converts kevent 322283441Sdchagin * events to epoll events and copies them back to the 323283441Sdchagin * userspace. This is also called on error on registering 324283441Sdchagin * of the filter. 325283441Sdchagin */ 326283441Sdchaginstatic int 327283441Sdchaginepoll_kev_copyout(void *arg, struct kevent *kevp, int count) 328283441Sdchagin{ 329283441Sdchagin struct epoll_copyout_args *args; 330283441Sdchagin struct linux_pemuldata *pem; 331283441Sdchagin struct epoll_emuldata *emd; 332283441Sdchagin struct epoll_event *eep; 333283441Sdchagin int error, fd, i; 334283441Sdchagin 335283441Sdchagin args = (struct epoll_copyout_args*) arg; 336283441Sdchagin eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 337283441Sdchagin 338283441Sdchagin pem = pem_find(args->p); 339283441Sdchagin KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 340283441Sdchagin LINUX_PEM_SLOCK(pem); 341283441Sdchagin emd = pem->epoll; 342283441Sdchagin KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); 343283441Sdchagin 344283441Sdchagin for (i = 0; i < count; i++) { 345283441Sdchagin kevent_to_epoll(&kevp[i], &eep[i]); 346283441Sdchagin 347283441Sdchagin fd = kevp[i].ident; 348283441Sdchagin KASSERT(fd <= emd->fdc, ("epoll user data vector" 349283441Sdchagin " is too small.\n")); 350283441Sdchagin eep[i].data = emd->udata[fd]; 351283441Sdchagin } 352283441Sdchagin LINUX_PEM_SUNLOCK(pem); 353283441Sdchagin 354283441Sdchagin error = copyout(eep, args->leventlist, count * sizeof(*eep)); 355283441Sdchagin if (error == 0) { 356283441Sdchagin args->leventlist += count; 357283441Sdchagin args->count += count; 358283441Sdchagin } else if (args->error == 0) 359283441Sdchagin args->error = error; 360283441Sdchagin 361283441Sdchagin free(eep, M_EPOLL); 362283441Sdchagin return (error); 363283441Sdchagin} 364283441Sdchagin 365283441Sdchagin/* 366283441Sdchagin * Copyin callback used by kevent. This copies already 367283441Sdchagin * converted filters from kernel memory to the kevent 368283441Sdchagin * internal kernel memory. Hence the memcpy instead of 369283441Sdchagin * copyin. 370283441Sdchagin */ 371283441Sdchaginstatic int 372283441Sdchaginepoll_kev_copyin(void *arg, struct kevent *kevp, int count) 373283441Sdchagin{ 374283441Sdchagin struct epoll_copyin_args *args; 375283441Sdchagin 376283441Sdchagin args = (struct epoll_copyin_args*) arg; 377283441Sdchagin 378283441Sdchagin memcpy(kevp, args->changelist, count * sizeof(*kevp)); 379283441Sdchagin args->changelist += count; 380283441Sdchagin 381283441Sdchagin return (0); 382283441Sdchagin} 383283441Sdchagin 384283441Sdchagin/* 385283441Sdchagin * Load epoll filter, convert it to kevent filter 386283441Sdchagin * and load it into kevent subsystem. 387283441Sdchagin */ 388283441Sdchaginint 389283441Sdchaginlinux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 390283441Sdchagin{ 391283441Sdchagin struct file *epfp, *fp; 392283441Sdchagin struct epoll_copyin_args ciargs; 393283441Sdchagin struct kevent kev[2]; 394283441Sdchagin struct kevent_copyops k_ops = { &ciargs, 395283441Sdchagin NULL, 396283441Sdchagin epoll_kev_copyin}; 397283441Sdchagin struct epoll_event le; 398283441Sdchagin cap_rights_t rights; 399283441Sdchagin int kev_flags; 400283441Sdchagin int nchanges = 0; 401283441Sdchagin int error; 402283441Sdchagin 403283441Sdchagin if (args->op != LINUX_EPOLL_CTL_DEL) { 404283441Sdchagin error = copyin(args->event, &le, sizeof(le)); 405283441Sdchagin if (error != 0) 406283441Sdchagin return (error); 407283441Sdchagin } 408283441Sdchagin 409283441Sdchagin error = fget(td, args->epfd, 410283441Sdchagin cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); 411283441Sdchagin if (error != 0) 412283441Sdchagin return (error); 413283441Sdchagin if (epfp->f_type != DTYPE_KQUEUE) 414283441Sdchagin goto leave1; 415283441Sdchagin 416283441Sdchagin /* Protect user data vector from incorrectly supplied fd. */ 417283441Sdchagin error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); 418283441Sdchagin if (error != 0) 419283441Sdchagin goto leave1; 420283441Sdchagin 421283441Sdchagin /* Linux disallows spying on himself */ 422283441Sdchagin if (epfp == fp) { 423283441Sdchagin error = EINVAL; 424283441Sdchagin goto leave0; 425283441Sdchagin } 426283441Sdchagin 427283441Sdchagin ciargs.changelist = kev; 428283441Sdchagin 429283441Sdchagin switch (args->op) { 430283441Sdchagin case LINUX_EPOLL_CTL_MOD: 431283441Sdchagin /* 432283441Sdchagin * We don't memorize which events were set for this FD 433283441Sdchagin * on this level, so just delete all we could have set: 434283441Sdchagin * EVFILT_READ and EVFILT_WRITE, ignoring any errors 435283441Sdchagin */ 436283441Sdchagin error = epoll_delete_all_events(td, epfp, args->fd); 437283441Sdchagin if (error) 438283441Sdchagin goto leave0; 439283441Sdchagin /* FALLTHROUGH */ 440283441Sdchagin 441283441Sdchagin case LINUX_EPOLL_CTL_ADD: 442283441Sdchagin kev_flags = EV_ADD | EV_ENABLE; 443283441Sdchagin break; 444283441Sdchagin 445283441Sdchagin case LINUX_EPOLL_CTL_DEL: 446283441Sdchagin /* CTL_DEL means unregister this fd with this epoll */ 447283441Sdchagin error = epoll_delete_all_events(td, epfp, args->fd); 448283441Sdchagin goto leave0; 449283441Sdchagin 450283441Sdchagin default: 451283441Sdchagin error = EINVAL; 452283441Sdchagin goto leave0; 453283441Sdchagin } 454283441Sdchagin 455283441Sdchagin error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, 456283441Sdchagin kev, &nchanges); 457283441Sdchagin if (error) 458283441Sdchagin goto leave0; 459283441Sdchagin 460283441Sdchagin epoll_fd_install(td, args->fd, le.data); 461283441Sdchagin 462283441Sdchagin error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 463283441Sdchagin 464283441Sdchaginleave0: 465283441Sdchagin fdrop(fp, td); 466283441Sdchagin 467283441Sdchaginleave1: 468283441Sdchagin fdrop(epfp, td); 469283441Sdchagin return (error); 470283441Sdchagin} 471283441Sdchagin 472283441Sdchagin/* 473283441Sdchagin * Wait for a filter to be triggered on the epoll file descriptor. 474283441Sdchagin */ 475293585Sdchaginstatic int 476293585Sdchaginlinux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 477293585Sdchagin int maxevents, int timeout, sigset_t *uset) 478283441Sdchagin{ 479283441Sdchagin struct file *epfp; 480283441Sdchagin struct timespec ts, *tsp; 481283441Sdchagin cap_rights_t rights; 482283441Sdchagin struct epoll_copyout_args coargs; 483283441Sdchagin struct kevent_copyops k_ops = { &coargs, 484283441Sdchagin epoll_kev_copyout, 485283441Sdchagin NULL}; 486283441Sdchagin int error; 487283441Sdchagin 488293585Sdchagin if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 489283441Sdchagin return (EINVAL); 490283441Sdchagin 491293585Sdchagin if (uset != NULL) { 492293585Sdchagin error = kern_sigprocmask(td, SIG_SETMASK, uset, 493293585Sdchagin &td->td_oldsigmask, 0); 494293585Sdchagin if (error != 0) 495293585Sdchagin return (error); 496293585Sdchagin td->td_pflags |= TDP_OLDMASK; 497293585Sdchagin /* 498293585Sdchagin * Make sure that ast() is called on return to 499293585Sdchagin * usermode and TDP_OLDMASK is cleared, restoring old 500293585Sdchagin * sigmask. 501293585Sdchagin */ 502293585Sdchagin thread_lock(td); 503293585Sdchagin td->td_flags |= TDF_ASTPENDING; 504293585Sdchagin thread_unlock(td); 505293585Sdchagin } 506293585Sdchagin 507293585Sdchagin error = fget(td, epfd, 508283441Sdchagin cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); 509283441Sdchagin if (error != 0) 510283441Sdchagin return (error); 511283441Sdchagin 512293585Sdchagin coargs.leventlist = events; 513283441Sdchagin coargs.p = td->td_proc; 514283441Sdchagin coargs.count = 0; 515283441Sdchagin coargs.error = 0; 516283441Sdchagin 517293585Sdchagin if (timeout != -1) { 518293585Sdchagin if (timeout < 0) { 519283441Sdchagin error = EINVAL; 520283441Sdchagin goto leave; 521283441Sdchagin } 522283441Sdchagin /* Convert from milliseconds to timespec. */ 523293585Sdchagin ts.tv_sec = timeout / 1000; 524293585Sdchagin ts.tv_nsec = (timeout % 1000) * 1000000; 525283441Sdchagin tsp = &ts; 526283441Sdchagin } else { 527283441Sdchagin tsp = NULL; 528283441Sdchagin } 529283441Sdchagin 530293585Sdchagin error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 531283441Sdchagin if (error == 0 && coargs.error != 0) 532283441Sdchagin error = coargs.error; 533283441Sdchagin 534283441Sdchagin /* 535283441Sdchagin * kern_kevent might return ENOMEM which is not expected from epoll_wait. 536283441Sdchagin * Maybe we should translate that but I don't think it matters at all. 537283441Sdchagin */ 538283441Sdchagin if (error == 0) 539283441Sdchagin td->td_retval[0] = coargs.count; 540283441Sdchaginleave: 541283441Sdchagin fdrop(epfp, td); 542283441Sdchagin return (error); 543283441Sdchagin} 544283441Sdchagin 545293585Sdchaginint 546293585Sdchaginlinux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 547293585Sdchagin{ 548293585Sdchagin 549293585Sdchagin return (linux_epoll_wait_common(td, args->epfd, args->events, 550293585Sdchagin args->maxevents, args->timeout, NULL)); 551293585Sdchagin} 552293585Sdchagin 553293585Sdchaginint 554293585Sdchaginlinux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 555293585Sdchagin{ 556293585Sdchagin sigset_t mask, *pmask; 557293585Sdchagin l_sigset_t lmask; 558293585Sdchagin int error; 559293585Sdchagin 560293585Sdchagin if (args->mask != NULL) { 561293585Sdchagin error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); 562293585Sdchagin if (error != 0) 563293585Sdchagin return (error); 564293585Sdchagin linux_to_bsd_sigset(&lmask, &mask); 565293585Sdchagin pmask = &mask; 566293585Sdchagin } else 567293585Sdchagin pmask = NULL; 568293585Sdchagin return (linux_epoll_wait_common(td, args->epfd, args->events, 569293585Sdchagin args->maxevents, args->timeout, pmask)); 570293585Sdchagin} 571293585Sdchagin 572283441Sdchaginstatic int 573283441Sdchaginepoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) 574283441Sdchagin{ 575283441Sdchagin struct epoll_copyin_args ciargs; 576283441Sdchagin struct kevent kev; 577283441Sdchagin struct kevent_copyops k_ops = { &ciargs, 578283441Sdchagin NULL, 579283441Sdchagin epoll_kev_copyin}; 580283441Sdchagin int error; 581283441Sdchagin 582283441Sdchagin ciargs.changelist = &kev; 583283441Sdchagin EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); 584283441Sdchagin 585283441Sdchagin error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); 586283441Sdchagin 587283441Sdchagin /* 588283441Sdchagin * here we ignore ENONT, because we don't keep track of events here 589283441Sdchagin */ 590283441Sdchagin if (error == ENOENT) 591283441Sdchagin error = 0; 592283441Sdchagin return (error); 593283441Sdchagin} 594283441Sdchagin 595283441Sdchaginstatic int 596283441Sdchaginepoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 597283441Sdchagin{ 598283441Sdchagin int error1, error2; 599283441Sdchagin 600283441Sdchagin error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); 601283441Sdchagin error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); 602283441Sdchagin 603283441Sdchagin /* report any errors we got */ 604283441Sdchagin return (error1 == 0 ? error2 : error1); 605283441Sdchagin} 606293549Sdchagin 607293549Sdchaginstatic int 608293549Sdchagineventfd_create(struct thread *td, uint32_t initval, int flags) 609293549Sdchagin{ 610293549Sdchagin struct filedesc *fdp; 611293549Sdchagin struct eventfd *efd; 612293549Sdchagin struct file *fp; 613293549Sdchagin int fflags, fd, error; 614293549Sdchagin 615293549Sdchagin fflags = 0; 616293549Sdchagin if ((flags & LINUX_O_CLOEXEC) != 0) 617293549Sdchagin fflags |= O_CLOEXEC; 618293549Sdchagin 619293549Sdchagin fdp = td->td_proc->p_fd; 620293549Sdchagin error = falloc(td, &fp, &fd, fflags); 621293549Sdchagin if (error) 622293549Sdchagin return (error); 623293549Sdchagin 624293549Sdchagin efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); 625293549Sdchagin efd->efd_flags = flags; 626293549Sdchagin efd->efd_count = initval; 627293549Sdchagin mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 628293549Sdchagin 629293549Sdchagin knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 630293549Sdchagin 631293549Sdchagin fflags = FREAD | FWRITE; 632293549Sdchagin if ((flags & LINUX_O_NONBLOCK) != 0) 633293549Sdchagin fflags |= FNONBLOCK; 634293549Sdchagin 635293549Sdchagin finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); 636293549Sdchagin fdrop(fp, td); 637293549Sdchagin 638293549Sdchagin td->td_retval[0] = fd; 639293549Sdchagin return (error); 640293549Sdchagin} 641293549Sdchagin 642293549Sdchaginint 643293549Sdchaginlinux_eventfd(struct thread *td, struct linux_eventfd_args *args) 644293549Sdchagin{ 645293549Sdchagin 646293549Sdchagin return (eventfd_create(td, args->initval, 0)); 647293549Sdchagin} 648293549Sdchagin 649293549Sdchaginint 650293549Sdchaginlinux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 651293549Sdchagin{ 652293549Sdchagin 653293549Sdchagin if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) 654293549Sdchagin return (EINVAL); 655293549Sdchagin 656293549Sdchagin return (eventfd_create(td, args->initval, args->flags)); 657293549Sdchagin} 658293549Sdchagin 659293549Sdchaginstatic int 660293549Sdchagineventfd_close(struct file *fp, struct thread *td) 661293549Sdchagin{ 662293549Sdchagin struct eventfd *efd; 663293549Sdchagin 664293549Sdchagin efd = fp->f_data; 665293549Sdchagin if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 666293549Sdchagin return (EBADF); 667293549Sdchagin 668293549Sdchagin seldrain(&efd->efd_sel); 669293549Sdchagin knlist_destroy(&efd->efd_sel.si_note); 670293549Sdchagin 671293549Sdchagin fp->f_ops = &badfileops; 672293549Sdchagin mtx_destroy(&efd->efd_lock); 673293549Sdchagin free(efd, M_EPOLL); 674293549Sdchagin 675293549Sdchagin return (0); 676293549Sdchagin} 677293549Sdchagin 678293549Sdchaginstatic int 679293549Sdchagineventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 680293549Sdchagin int flags, struct thread *td) 681293549Sdchagin{ 682293549Sdchagin struct eventfd *efd; 683293549Sdchagin eventfd_t count; 684293549Sdchagin int error; 685293549Sdchagin 686293549Sdchagin efd = fp->f_data; 687293549Sdchagin if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 688293549Sdchagin return (EBADF); 689293549Sdchagin 690293549Sdchagin if (uio->uio_resid < sizeof(eventfd_t)) 691293549Sdchagin return (EINVAL); 692293549Sdchagin 693293549Sdchagin error = 0; 694293549Sdchagin mtx_lock(&efd->efd_lock); 695293549Sdchaginretry: 696293549Sdchagin if (efd->efd_count == 0) { 697293549Sdchagin if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 698293549Sdchagin mtx_unlock(&efd->efd_lock); 699293549Sdchagin return (EAGAIN); 700293549Sdchagin } 701293549Sdchagin error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); 702293549Sdchagin if (error == 0) 703293549Sdchagin goto retry; 704293549Sdchagin } 705293549Sdchagin if (error == 0) { 706293549Sdchagin if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { 707293549Sdchagin count = 1; 708293549Sdchagin --efd->efd_count; 709293549Sdchagin } else { 710293549Sdchagin count = efd->efd_count; 711293549Sdchagin efd->efd_count = 0; 712293549Sdchagin } 713293549Sdchagin KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 714293549Sdchagin selwakeup(&efd->efd_sel); 715293549Sdchagin wakeup(&efd->efd_count); 716293549Sdchagin mtx_unlock(&efd->efd_lock); 717293549Sdchagin error = uiomove(&count, sizeof(eventfd_t), uio); 718293549Sdchagin } else 719293549Sdchagin mtx_unlock(&efd->efd_lock); 720293549Sdchagin 721293549Sdchagin return (error); 722293549Sdchagin} 723293549Sdchagin 724293549Sdchaginstatic int 725293549Sdchagineventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 726293549Sdchagin int flags, struct thread *td) 727293549Sdchagin{ 728293549Sdchagin struct eventfd *efd; 729293549Sdchagin eventfd_t count; 730293549Sdchagin int error; 731293549Sdchagin 732293549Sdchagin efd = fp->f_data; 733293549Sdchagin if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 734293549Sdchagin return (EBADF); 735293549Sdchagin 736293549Sdchagin if (uio->uio_resid < sizeof(eventfd_t)) 737293549Sdchagin return (EINVAL); 738293549Sdchagin 739293549Sdchagin error = uiomove(&count, sizeof(eventfd_t), uio); 740293549Sdchagin if (error) 741293549Sdchagin return (error); 742293549Sdchagin if (count == UINT64_MAX) 743293549Sdchagin return (EINVAL); 744293549Sdchagin 745293549Sdchagin mtx_lock(&efd->efd_lock); 746293549Sdchaginretry: 747293549Sdchagin if (UINT64_MAX - efd->efd_count <= count) { 748293549Sdchagin if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 749293549Sdchagin mtx_unlock(&efd->efd_lock); 750293549Sdchagin return (EAGAIN); 751293549Sdchagin } 752293549Sdchagin error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 753293549Sdchagin PCATCH, "lefdwr", 0); 754293549Sdchagin if (error == 0) 755293549Sdchagin goto retry; 756293549Sdchagin } 757293549Sdchagin if (error == 0) { 758293549Sdchagin efd->efd_count += count; 759293549Sdchagin KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 760293549Sdchagin selwakeup(&efd->efd_sel); 761293549Sdchagin wakeup(&efd->efd_count); 762293549Sdchagin } 763293549Sdchagin mtx_unlock(&efd->efd_lock); 764293549Sdchagin 765293549Sdchagin return (error); 766293549Sdchagin} 767293549Sdchagin 768293549Sdchaginstatic int 769293549Sdchagineventfd_poll(struct file *fp, int events, struct ucred *active_cred, 770293549Sdchagin struct thread *td) 771293549Sdchagin{ 772293549Sdchagin struct eventfd *efd; 773293549Sdchagin int revents = 0; 774293549Sdchagin 775293549Sdchagin efd = fp->f_data; 776293549Sdchagin if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 777293549Sdchagin return (POLLERR); 778293549Sdchagin 779293549Sdchagin mtx_lock(&efd->efd_lock); 780293549Sdchagin if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) 781293549Sdchagin revents |= events & (POLLIN|POLLRDNORM); 782293549Sdchagin if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) 783293549Sdchagin revents |= events & (POLLOUT|POLLWRNORM); 784293549Sdchagin if (revents == 0) 785293549Sdchagin selrecord(td, &efd->efd_sel); 786293549Sdchagin mtx_unlock(&efd->efd_lock); 787293549Sdchagin 788293549Sdchagin return (revents); 789293549Sdchagin} 790293549Sdchagin 791293549Sdchagin/*ARGSUSED*/ 792293549Sdchaginstatic int 793293549Sdchagineventfd_kqfilter(struct file *fp, struct knote *kn) 794293549Sdchagin{ 795293549Sdchagin struct eventfd *efd; 796293549Sdchagin 797293549Sdchagin efd = fp->f_data; 798293549Sdchagin if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 799293549Sdchagin return (EINVAL); 800293549Sdchagin 801293549Sdchagin mtx_lock(&efd->efd_lock); 802293549Sdchagin switch (kn->kn_filter) { 803293549Sdchagin case EVFILT_READ: 804293549Sdchagin kn->kn_fop = &eventfd_rfiltops; 805293549Sdchagin break; 806293549Sdchagin case EVFILT_WRITE: 807293549Sdchagin kn->kn_fop = &eventfd_wfiltops; 808293549Sdchagin break; 809293549Sdchagin default: 810293549Sdchagin mtx_unlock(&efd->efd_lock); 811293549Sdchagin return (EINVAL); 812293549Sdchagin } 813293549Sdchagin 814293549Sdchagin kn->kn_hook = efd; 815293549Sdchagin knlist_add(&efd->efd_sel.si_note, kn, 1); 816293549Sdchagin mtx_unlock(&efd->efd_lock); 817293549Sdchagin 818293549Sdchagin return (0); 819293549Sdchagin} 820293549Sdchagin 821293549Sdchaginstatic void 822293549Sdchaginfilt_eventfddetach(struct knote *kn) 823293549Sdchagin{ 824293549Sdchagin struct eventfd *efd = kn->kn_hook; 825293549Sdchagin 826293549Sdchagin mtx_lock(&efd->efd_lock); 827293549Sdchagin knlist_remove(&efd->efd_sel.si_note, kn, 1); 828293549Sdchagin mtx_unlock(&efd->efd_lock); 829293549Sdchagin} 830293549Sdchagin 831293549Sdchagin/*ARGSUSED*/ 832293549Sdchaginstatic int 833293549Sdchaginfilt_eventfdread(struct knote *kn, long hint) 834293549Sdchagin{ 835293549Sdchagin struct eventfd *efd = kn->kn_hook; 836293549Sdchagin int ret; 837293549Sdchagin 838293549Sdchagin mtx_assert(&efd->efd_lock, MA_OWNED); 839293549Sdchagin ret = (efd->efd_count > 0); 840293549Sdchagin 841293549Sdchagin return (ret); 842293549Sdchagin} 843293549Sdchagin 844293549Sdchagin/*ARGSUSED*/ 845293549Sdchaginstatic int 846293549Sdchaginfilt_eventfdwrite(struct knote *kn, long hint) 847293549Sdchagin{ 848293549Sdchagin struct eventfd *efd = kn->kn_hook; 849293549Sdchagin int ret; 850293549Sdchagin 851293549Sdchagin mtx_assert(&efd->efd_lock, MA_OWNED); 852293549Sdchagin ret = (UINT64_MAX - 1 > efd->efd_count); 853293549Sdchagin 854293549Sdchagin return (ret); 855293549Sdchagin} 856293549Sdchagin 857293549Sdchagin/*ARGSUSED*/ 858293549Sdchaginstatic int 859293549Sdchagineventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, 860293549Sdchagin struct thread *td) 861293549Sdchagin{ 862293549Sdchagin 863293549Sdchagin return (ENXIO); 864293549Sdchagin} 865293549Sdchagin 866293549Sdchagin/*ARGSUSED*/ 867293549Sdchaginstatic int 868293549Sdchagineventfd_ioctl(struct file *fp, u_long cmd, void *data, 869293549Sdchagin struct ucred *active_cred, struct thread *td) 870293549Sdchagin{ 871293549Sdchagin 872293549Sdchagin return (ENXIO); 873293549Sdchagin} 874293549Sdchagin 875293549Sdchagin/*ARGSUSED*/ 876293549Sdchaginstatic int 877293549Sdchagineventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 878293549Sdchagin struct thread *td) 879293549Sdchagin{ 880293549Sdchagin 881293549Sdchagin return (ENXIO); 882293549Sdchagin} 883