1// Copyright 2016 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <assert.h>
6#include <dirent.h>
7#include <errno.h>
8#include <fcntl.h>
9#include <limits.h>
10#include <poll.h>
11#include <stdarg.h>
12#include <stdbool.h>
13#include <stdlib.h>
14#include <string.h>
15#include <sys/ioctl.h>
16#include <sys/mman.h>
17#include <sys/select.h>
18#include <sys/stat.h>
19#include <sys/statfs.h>
20#include <sys/uio.h>
21#include <utime.h>
22#include <threads.h>
23#include <unistd.h>
24
25#include <fuchsia/io/c/fidl.h>
26#include <zircon/assert.h>
27#include <zircon/compiler.h>
28#include <zircon/device/vfs.h>
29#include <zircon/process.h>
30#include <zircon/processargs.h>
31#include <zircon/syscalls.h>
32#include <zircon/time.h>
33
34#include <fuchsia/io/c/fidl.h>
35#include <lib/fdio/debug.h>
36#include <lib/fdio/io.h>
37#include <lib/fdio/namespace.h>
38#include <lib/fdio/private.h>
39#include <lib/fdio/remoteio.h>
40#include <lib/fdio/util.h>
41#include <lib/fdio/vfs.h>
42#include <lib/fdio/socket.h>
43
44#include "private.h"
45#include "unistd.h"
46
47static_assert(IOFLAG_CLOEXEC == FD_CLOEXEC, "Unexpected fdio flags value");
48
49// non-thread-safe emulation of unistd io functions
50// using the fdio transports
51
52fdio_state_t __fdio_global_state = {
53    .lock = MTX_INIT,
54    .cwd_lock = MTX_INIT,
55    .init = true,
56    .cwd_path = "/",
57};
58
59// Attaches an fdio to an fdtab slot.
60// The fdio must have been upref'd on behalf of the
61// fdtab prior to binding.
62__EXPORT
63int fdio_bind_to_fd(fdio_t* io, int fd, int starting_fd) {
64    fdio_t* io_to_close = NULL;
65
66    mtx_lock(&fdio_lock);
67    LOG(1, "fdio: bind_to_fd(%p, %d, %d)\n", io, fd, starting_fd);
68    if (fd < 0) {
69        // A negative fd implies that any free fd value can be used
70        //TODO: bitmap, ffs, etc
71        for (fd = starting_fd; fd < FDIO_MAX_FD; fd++) {
72            if (fdio_fdtab[fd] == NULL) {
73                goto free_fd_found;
74            }
75        }
76        errno = EMFILE;
77        mtx_unlock(&fdio_lock);
78        return -1;
79    } else if (fd >= FDIO_MAX_FD) {
80        errno = EINVAL;
81        mtx_unlock(&fdio_lock);
82        return -1;
83    } else {
84        io_to_close = fdio_fdtab[fd];
85        if (io_to_close) {
86            io_to_close->dupcount--;
87            LOG(1, "fdio: bind_to_fd: closed fd=%d, io=%p, dupcount=%d\n",
88                fd, io_to_close, io_to_close->dupcount);
89            if (io_to_close->dupcount > 0) {
90                // still alive in another fdtab slot
91                fdio_release(io_to_close);
92                io_to_close = NULL;
93            }
94        }
95    }
96
97free_fd_found:
98    LOG(1, "fdio: bind_to_fd() OK fd=%d\n", fd);
99    io->dupcount++;
100    fdio_fdtab[fd] = io;
101    mtx_unlock(&fdio_lock);
102
103    if (io_to_close) {
104        io_to_close->ops->close(io_to_close);
105        fdio_release(io_to_close);
106    }
107    return fd;
108}
109
110// If a fdio_t exists for this fd and it has not been dup'd
111// and is not in active use (an io operation underway, etc),
112// detach it from the fdtab and return it with a single
113// refcount.
114__EXPORT
115zx_status_t fdio_unbind_from_fd(int fd, fdio_t** out) {
116    zx_status_t status;
117    mtx_lock(&fdio_lock);
118    LOG(1, "fdio: unbind_from_fd(%d)\n", fd);
119    if (fd >= FDIO_MAX_FD) {
120        status = ZX_ERR_INVALID_ARGS;
121        goto done;
122    }
123    fdio_t* io = fdio_fdtab[fd];
124    if (io == NULL) {
125        status = ZX_ERR_INVALID_ARGS;
126        goto done;
127    }
128    if (io->dupcount > 1) {
129        status = ZX_ERR_UNAVAILABLE;
130        goto done;
131    }
132    if (atomic_load(&io->refcount) > 1) {
133        status = ZX_ERR_UNAVAILABLE;
134        goto done;
135    }
136    io->dupcount = 0;
137    fdio_fdtab[fd] = NULL;
138    *out = io;
139    status = ZX_OK;
140done:
141    mtx_unlock(&fdio_lock);
142    return status;
143}
144
145__EXPORT
146fdio_t* __fdio_fd_to_io(int fd) {
147    if ((fd < 0) || (fd >= FDIO_MAX_FD)) {
148        return NULL;
149    }
150    fdio_t* io = NULL;
151    mtx_lock(&fdio_lock);
152    if ((io = fdio_fdtab[fd]) != NULL) {
153        fdio_acquire(io);
154    }
155    mtx_unlock(&fdio_lock);
156    return io;
157}
158
159zx_status_t fdio_close(fdio_t* io) {
160    if (io->dupcount > 0) {
161        LOG(1, "fdio: close(%p): nonzero dupcount!\n", io);
162    }
163    LOG(1, "fdio: io: close(%p)\n", io);
164    return io->ops->close(io);
165}
166
167// Verify the O_* flags which align with ZXIO_FS_*.
168static_assert(O_PATH == ZX_FS_FLAG_VNODE_REF_ONLY, "Open Flag mismatch");
169static_assert(O_ADMIN == ZX_FS_RIGHT_ADMIN, "Open Flag mismatch");
170static_assert(O_CREAT == ZX_FS_FLAG_CREATE, "Open Flag mismatch");
171static_assert(O_EXCL == ZX_FS_FLAG_EXCLUSIVE, "Open Flag mismatch");
172static_assert(O_TRUNC == ZX_FS_FLAG_TRUNCATE, "Open Flag mismatch");
173static_assert(O_DIRECTORY == ZX_FS_FLAG_DIRECTORY, "Open Flag mismatch");
174static_assert(O_APPEND == ZX_FS_FLAG_APPEND, "Open Flag mismatch");
175static_assert(O_NOREMOTE == ZX_FS_FLAG_NOREMOTE, "Open Flag mismatch");
176
177// The mask of "1:1" flags which match between both open flag representations.
178#define ZXIO_FS_MASK (O_PATH | O_ADMIN | O_CREAT | O_EXCL | O_TRUNC | \
179                      O_DIRECTORY | O_APPEND | O_NOREMOTE)
180
181// Verify that the remaining O_* flags don't overlap with the ZXIO mask.
182static_assert(!(O_RDONLY & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
183static_assert(!(O_WRONLY & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
184static_assert(!(O_RDWR & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
185static_assert(!(O_NONBLOCK & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
186static_assert(!(O_DSYNC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
187static_assert(!(O_SYNC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
188static_assert(!(O_RSYNC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
189static_assert(!(O_NOFOLLOW & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
190static_assert(!(O_CLOEXEC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
191static_assert(!(O_NOCTTY & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
192static_assert(!(O_ASYNC & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
193static_assert(!(O_DIRECT & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
194static_assert(!(O_LARGEFILE & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
195static_assert(!(O_NOATIME & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
196static_assert(!(O_TMPFILE & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
197static_assert(!(O_PIPELINE & ZXIO_FS_MASK), "Unexpected collision with ZXIO_FS_MASK");
198
199static uint32_t fdio_flags_to_zxio(uint32_t flags) {
200    uint32_t result = 0;
201    switch (flags & O_ACCMODE) {
202    case O_RDONLY:
203        result |= ZX_FS_RIGHT_READABLE;
204        break;
205    case O_WRONLY:
206        result |= ZX_FS_RIGHT_WRITABLE;
207        break;
208    case O_RDWR:
209        result |= ZX_FS_RIGHT_READABLE | ZX_FS_RIGHT_WRITABLE;
210        break;
211    }
212
213    if (!(flags & O_PIPELINE)) {
214        result |= ZX_FS_FLAG_DESCRIBE;
215    }
216
217    result |= (flags & ZXIO_FS_MASK);
218    return result;
219}
220
221static uint32_t zxio_flags_to_fdio(uint32_t flags) {
222    uint32_t result = 0;
223    if ((flags & (ZX_FS_RIGHT_READABLE | ZX_FS_RIGHT_WRITABLE)) ==
224        (ZX_FS_RIGHT_READABLE | ZX_FS_RIGHT_WRITABLE)) {
225        result |= O_RDWR;
226    } else if (flags & ZX_FS_RIGHT_WRITABLE) {
227        result |= O_WRONLY;
228    } else {
229        result |= O_RDONLY;
230    }
231
232    result |= (flags & ZXIO_FS_MASK);
233    return result;
234}
235
236
237// Possibly return an owned fdio_t corresponding to either the root,
238// the cwd, or, for the ...at variants, dirfd. In the absolute path
239// case, *path is also adjusted.
240static fdio_t* fdio_iodir(const char** path, int dirfd) {
241    fdio_t* iodir = NULL;
242    mtx_lock(&fdio_lock);
243    if (*path[0] == '/') {
244        iodir = fdio_root_handle;
245        // Since we are sending a request to the root handle, the
246        // rest of the path should be canonicalized as a relative
247        // path (relative to this root handle).
248        while (*path[0] == '/') {
249            (*path)++;
250            if (*path[0] == 0) {
251                *path = ".";
252            }
253        }
254    } else if (dirfd == AT_FDCWD) {
255        iodir = fdio_cwd_handle;
256    } else if ((dirfd >= 0) && (dirfd < FDIO_MAX_FD)) {
257        iodir = fdio_fdtab[dirfd];
258    }
259    if (iodir != NULL) {
260        fdio_acquire(iodir);
261    }
262    mtx_unlock(&fdio_lock);
263    return iodir;
264}
265
266#define IS_SEPARATOR(c) ((c) == '/' || (c) == 0)
267
268// Checks that if we increment this index forward, we'll
269// still have enough space for a null terminator within
270// PATH_MAX bytes.
271#define CHECK_CAN_INCREMENT(i)           \
272    if (unlikely((i) + 1 >= PATH_MAX)) { \
273        return ZX_ERR_BAD_PATH;          \
274    }
275
276// Cleans an input path, transforming it to out, according to the
277// rules defined by "Lexical File Names in Plan 9 or Getting Dot-Dot Right",
278// accessible at: https://9p.io/sys/doc/lexnames.html
279//
280// Code heavily inspired by Go's filepath.Clean function, from:
281// https://golang.org/src/path/filepath/path.go
282//
283// out is expected to be PATH_MAX bytes long.
284// Sets is_dir to 'true' if the path is a directory, and 'false' otherwise.
285__EXPORT
286zx_status_t __fdio_cleanpath(const char* in, char* out, size_t* outlen, bool* is_dir) {
287    if (in[0] == 0) {
288        strcpy(out, ".");
289        *outlen = 1;
290        *is_dir = true;
291        return ZX_OK;
292    }
293
294    bool rooted = (in[0] == '/');
295    size_t in_index = 0; // Index of the next byte to read
296    size_t out_index = 0; // Index of the next byte to write
297
298    if (rooted) {
299        out[out_index++] = '/';
300        in_index++;
301        *is_dir = true;
302    }
303    size_t dotdot = out_index; // The output index at which '..' cannot be cleaned further.
304
305    while (in[in_index] != 0) {
306        *is_dir = true;
307        if (in[in_index] == '/') {
308            // 1. Reduce multiple slashes to a single slash
309            CHECK_CAN_INCREMENT(in_index);
310            in_index++;
311        } else if (in[in_index] == '.' && IS_SEPARATOR(in[in_index + 1])) {
312            // 2. Eliminate . path name elements (the current directory)
313            CHECK_CAN_INCREMENT(in_index);
314            in_index++;
315        } else if (in[in_index] == '.' && in[in_index + 1] == '.' &&
316                   IS_SEPARATOR(in[in_index + 2])) {
317            CHECK_CAN_INCREMENT(in_index + 1);
318            in_index += 2;
319            if (out_index > dotdot) {
320                // 3. Eliminate .. path elements (the parent directory) and the element that
321                // precedes them.
322                out_index--;
323                while (out_index > dotdot && out[out_index] != '/') { out_index--; }
324            } else if (rooted) {
325                // 4. Eliminate .. elements that begin a rooted path, that is, replace /.. by / at
326                // the beginning of a path.
327                continue;
328            } else if (!rooted) {
329                if (out_index > 0) {
330                    out[out_index++] = '/';
331                }
332                // 5. Leave intact .. elements that begin a non-rooted path.
333                out[out_index++] = '.';
334                out[out_index++] = '.';
335                dotdot = out_index;
336            }
337        } else {
338            *is_dir = false;
339            if ((rooted && out_index != 1) || (!rooted && out_index != 0)) {
340                // Add '/' before normal path component, for non-root components.
341                out[out_index++] = '/';
342            }
343
344            while (!IS_SEPARATOR(in[in_index])) {
345                CHECK_CAN_INCREMENT(in_index);
346                out[out_index++] = in[in_index++];
347            }
348        }
349    }
350
351    if (out_index == 0) {
352        strcpy(out, ".");
353        *outlen = 1;
354        *is_dir = true;
355        return ZX_OK;
356    }
357
358    // Append null character
359    *outlen = out_index;
360    out[out_index++] = 0;
361    return ZX_OK;
362}
363
364zx_status_t __fdio_open_at(fdio_t** io, int dirfd, const char* path, int flags, uint32_t mode) {
365    if (path == NULL) {
366        return ZX_ERR_INVALID_ARGS;
367    }
368    if (path[0] == 0) {
369        return ZX_ERR_NOT_FOUND;
370    }
371    fdio_t* iodir = fdio_iodir(&path, dirfd);
372    if (iodir == NULL) {
373        return ZX_ERR_BAD_HANDLE;
374    }
375
376    char clean[PATH_MAX];
377    size_t outlen;
378    bool is_dir;
379    zx_status_t status = __fdio_cleanpath(path, clean, &outlen, &is_dir);
380    if (status != ZX_OK) {
381        return status;
382    }
383    flags |= (is_dir ? O_DIRECTORY : 0);
384
385    status = iodir->ops->open(iodir, clean, fdio_flags_to_zxio(flags), mode, io);
386    fdio_release(iodir);
387    return status;
388}
389
390zx_status_t __fdio_open(fdio_t** io, const char* path, int flags, uint32_t mode) {
391    return __fdio_open_at(io, AT_FDCWD, path, flags, mode);
392}
393
394static void update_cwd_path(const char* path) {
395    if (path[0] == '/') {
396        // it's "absolute", but we'll still parse it as relative (from /)
397        // so that we normalize the path (resolving, ., .., //, etc)
398        fdio_cwd_path[0] = '/';
399        fdio_cwd_path[1] = 0;
400        path++;
401    }
402
403    size_t seglen;
404    const char* next;
405    for (; path[0]; path = next) {
406        next = strchr(path, '/');
407        if (next == NULL) {
408            seglen = strlen(path);
409            next = path + seglen;
410        } else {
411            seglen = next - path;
412            next++;
413        }
414        if (seglen == 0) {
415            // empty segment, skip
416            continue;
417        }
418        if ((seglen == 1) && (path[0] == '.')) {
419            // no-change segment, skip
420            continue;
421        }
422        if ((seglen == 2) && (path[0] == '.') && (path[1] == '.')) {
423            // parent directory, remove the trailing path segment from cwd_path
424            char* x = strrchr(fdio_cwd_path, '/');
425            if (x == NULL) {
426                // shouldn't ever happen
427                goto wat;
428            }
429            // remove the current trailing path segment from cwd
430            if (x == fdio_cwd_path) {
431                // but never remove the first /
432                fdio_cwd_path[1] = 0;
433            } else {
434                x[0] = 0;
435            }
436            continue;
437        }
438        // regular path segment, append to cwd_path
439        size_t len = strlen(fdio_cwd_path);
440        if ((len + seglen + 2) >= PATH_MAX) {
441            // doesn't fit, shouldn't happen, but...
442            goto wat;
443        }
444        if (len != 1) {
445            // if len is 1, path is "/", so don't append a '/'
446            fdio_cwd_path[len++] = '/';
447        }
448        memcpy(fdio_cwd_path + len, path, seglen);
449        fdio_cwd_path[len + seglen] = 0;
450    }
451    return;
452
453wat:
454    strcpy(fdio_cwd_path, "(unknown)");
455    return;
456}
457
458// Opens the directory containing path
459//
460// Returns the non-directory portion of the path in 'out', which
461// must be a buffer that can fit [NAME_MAX + 1] characters.
462static zx_status_t __fdio_opendir_containing_at(fdio_t** io, int dirfd, const char* path,
463                                                char* out) {
464    if (path == NULL) {
465        return ZX_ERR_INVALID_ARGS;
466    }
467
468    fdio_t* iodir = fdio_iodir(&path, dirfd);
469    if (iodir == NULL) {
470        return ZX_ERR_BAD_HANDLE;
471    }
472
473    char clean[PATH_MAX];
474    size_t pathlen;
475    bool is_dir;
476    zx_status_t status = __fdio_cleanpath(path, clean, &pathlen, &is_dir);
477    if (status != ZX_OK) {
478        fdio_release(iodir);
479        return status;
480    }
481
482    // Find the last '/'; copy everything after it.
483    size_t i = 0;
484    for (i = pathlen - 1; i > 0; i--) {
485        if (clean[i] == '/') {
486            clean[i] = 0;
487            i++;
488            break;
489        }
490    }
491
492    // clean[i] is now the start of the name
493    size_t namelen = pathlen - i;
494    if (namelen + (is_dir ? 1 : 0) > NAME_MAX) {
495        fdio_release(iodir);
496        return ZX_ERR_BAD_PATH;
497    }
498
499    // Copy the trailing 'name' to out.
500    memcpy(out, clean + i, namelen);
501    if (is_dir) {
502        // TODO(smklein): Propagate this information without using
503        // the output name; it'll simplify server-side path parsing
504        // if all trailing slashes are replaced with "O_DIRECTORY".
505        out[namelen++] = '/';
506    }
507    out[namelen] = 0;
508
509    if (i == 0 && clean[i] != '/') {
510        clean[0] = '.';
511        clean[1] = 0;
512    }
513
514    zx_status_t r = iodir->ops->open(iodir, clean,
515                                     fdio_flags_to_zxio(O_RDONLY | O_DIRECTORY), 0, io);
516    fdio_release(iodir);
517    return r;
518}
519
520// 'name' must be a user-provided buffer, at least NAME_MAX + 1 bytes long.
521static zx_status_t __fdio_opendir_containing(fdio_t** io, const char* path, char* name) {
522    return __fdio_opendir_containing_at(io, AT_FDCWD, path, name);
523}
524
525// hook into libc process startup
526// this is called prior to main to set up the fdio world
527// and thus does not use the fdio_lock
528__EXPORT
529void __libc_extensions_init(uint32_t handle_count,
530                            zx_handle_t handle[],
531                            uint32_t handle_info[],
532                            uint32_t name_count,
533                            char** names) {
534
535#ifdef FDIO_LLDEBUG
536    const char* fdiodebug = getenv("FDIODEBUG");
537    if (fdiodebug) {
538        fdio_set_debug_level(strtoul(fdiodebug, NULL, 10));
539        LOG(1, "fdio: init: debuglevel = %s\n", fdiodebug);
540    } else {
541        LOG(1, "fdio: init()\n");
542    }
543#endif
544
545    int stdio_fd = -1;
546
547    // extract handles we care about
548    for (uint32_t n = 0; n < handle_count; n++) {
549        unsigned arg = PA_HND_ARG(handle_info[n]);
550        zx_handle_t h = handle[n];
551
552        // precalculate the fd from |arg|, for FDIO cases to use.
553        unsigned arg_fd = arg & (~FDIO_FLAG_USE_FOR_STDIO);
554
555        switch (PA_HND_TYPE(handle_info[n])) {
556        case PA_FDIO_REMOTE: {
557            // remote objects may have a second handle
558            // which is for signaling events
559            zx_handle_t event = ZX_HANDLE_INVALID;
560            if (((n + 1) < handle_count) &&
561                (handle_info[n] == handle_info[n + 1])) {
562                // TODO: Remove this case once all clients migrate to providing
563                // a single handle for PA_FDIO_REMOTE.
564                event = handle[n + 1];
565                handle_info[n + 1] = ZX_HANDLE_INVALID;
566            } else {
567                fuchsia_io_NodeInfo info;
568                memset(&info, 0, sizeof(info));
569                zx_status_t status = fuchsia_io_NodeDescribe(h, &info);
570                if (status != ZX_OK) {
571                    LOG(1, "fdio: Failed to describe fd=%d (rio) status=%d (%s)\n",
572                        arg_fd, status, zx_status_get_string(status));
573                    zx_handle_close(h);
574                    continue;
575                }
576
577                switch (info.tag) {
578                case fuchsia_io_NodeInfoTag_file:
579                    event = info.file.event;
580                    break;
581                case fuchsia_io_NodeInfoTag_device:
582                    event = info.device.event;
583                    break;
584                default:
585                    event = ZX_HANDLE_INVALID;
586                    break;
587                }
588            }
589
590            fdio_fdtab[arg_fd] = fdio_remote_create(h, event);
591            fdio_fdtab[arg_fd]->dupcount++;
592            LOG(1, "fdio: inherit fd=%d (rio)\n", arg_fd);
593            break;
594        }
595        case PA_FDIO_SOCKET: {
596            fdio_t* io = NULL;
597            zx_status_t status = fdio_acquire_socket(h, &io);
598            if (status != ZX_OK) {
599                LOG(1, "fdio: Failed to acquire for fd=%d (socket) status=%d (%s)\n",
600                    arg_fd, status, zx_status_get_string(status));
601                zx_handle_close(h);
602                continue;
603            }
604            fdio_fdtab[arg_fd] = io;
605            fdio_fdtab[arg_fd]->dupcount++;
606            LOG(1, "fdio: inherit fd=%d (socket)\n", arg_fd);
607            break;
608        }
609        case PA_FDIO_LOGGER:
610            fdio_fdtab[arg_fd] = fdio_logger_create(h);
611            fdio_fdtab[arg_fd]->dupcount++;
612            LOG(1, "fdio: inherit fd=%d (log)\n", arg_fd);
613            break;
614        case PA_NS_DIR:
615            // we always contine here to not steal the
616            // handles from higher level code that may
617            // also need access to the namespace
618            if (arg >= name_count) {
619                continue;
620            }
621            if (fdio_root_ns == NULL) {
622                if (fdio_ns_create(&fdio_root_ns) < 0) {
623                    continue;
624                }
625            }
626            fdio_ns_bind(fdio_root_ns, names[arg], h);
627            continue;
628        default:
629            // unknown handle, leave it alone
630            continue;
631        }
632        handle[n] = 0;
633        handle_info[n] = 0;
634
635        // If we reach here then the handle is a PA_FDIO_* type (an fd), so
636        // check for a bit flag indicating that it should be duped into 0/1/2 to
637        // become all of stdin/out/err
638        if ((arg & FDIO_FLAG_USE_FOR_STDIO) && (arg_fd < FDIO_MAX_FD)) {
639          stdio_fd = arg_fd;
640        }
641    }
642
643    const char* cwd = getenv("PWD");
644    cwd = (cwd == NULL) ? "/" : cwd;
645
646    update_cwd_path(cwd);
647
648    fdio_t* use_for_stdio = (stdio_fd >= 0) ? fdio_fdtab[stdio_fd] : NULL;
649
650    // configure stdin/out/err if not init'd
651    for (uint32_t n = 0; n < 3; n++) {
652        if (fdio_fdtab[n] == NULL) {
653            if (use_for_stdio) {
654                fdio_acquire(use_for_stdio);
655                fdio_fdtab[n] = use_for_stdio;
656            } else {
657                fdio_fdtab[n] = fdio_null_create();
658            }
659            fdio_fdtab[n]->dupcount++;
660            LOG(1, "fdio: inherit fd=%u (dup of fd=%d)\n", n, stdio_fd);
661        }
662    }
663
664    if (fdio_root_ns) {
665        ZX_ASSERT(!fdio_root_handle);
666        fdio_root_handle = fdio_ns_open_root(fdio_root_ns);
667    }
668    if (fdio_root_handle) {
669        fdio_root_init = true;
670        __fdio_open(&fdio_cwd_handle, fdio_cwd_path, O_RDONLY | O_DIRECTORY, 0);
671    } else {
672        // placeholder null handle
673        fdio_root_handle = fdio_null_create();
674    }
675    if (fdio_cwd_handle == NULL) {
676        fdio_cwd_handle = fdio_null_create();
677    }
678}
679
680// Clean up during process teardown. This runs after atexit hooks in
681// libc. It continues to hold the fdio lock until process exit, to
682// prevent other threads from racing on file descriptors.
683__EXPORT
684void __libc_extensions_fini(void) __TA_ACQUIRE(&fdio_lock) {
685    mtx_lock(&fdio_lock);
686    for (int fd = 0; fd < FDIO_MAX_FD; fd++) {
687        fdio_t* io = fdio_fdtab[fd];
688        if (io) {
689            fdio_fdtab[fd] = NULL;
690            io->dupcount--;
691            if (io->dupcount == 0) {
692                io->ops->close(io);
693                fdio_release(io);
694            }
695        }
696    }
697}
698
699__EXPORT
700zx_status_t fdio_ns_install(fdio_ns_t* ns) {
701    fdio_t* io = fdio_ns_open_root(ns);
702    if (io == NULL) {
703        return ZX_ERR_IO;
704    }
705
706    fdio_t* old_root = NULL;
707    zx_status_t status;
708
709    mtx_lock(&fdio_lock);
710    if (fdio_root_ns != NULL) {
711        //TODO: support replacing an active namespace
712        status = ZX_ERR_ALREADY_EXISTS;
713    } else {
714        fdio_root_ns = ns;
715        if (fdio_root_handle) {
716            old_root = fdio_root_handle;
717        }
718        fdio_root_handle = io;
719        status = ZX_OK;
720    }
721    mtx_unlock(&fdio_lock);
722
723    if (old_root) {
724        fdio_close(old_root);
725        fdio_release(old_root);
726    }
727    return status;
728}
729
730__EXPORT
731zx_status_t fdio_ns_get_installed(fdio_ns_t** ns) {
732    zx_status_t status = ZX_OK;
733    mtx_lock(&fdio_lock);
734    if (fdio_root_ns == NULL) {
735        status = ZX_ERR_NOT_FOUND;
736    } else {
737        *ns = fdio_root_ns;
738    }
739    mtx_unlock(&fdio_lock);
740    return status;
741}
742
743__EXPORT
744zx_status_t fdio_clone_cwd(zx_handle_t* handles, uint32_t* types) {
745    return fdio_cwd_handle->ops->clone(fdio_cwd_handle, handles, types);
746}
747
748__EXPORT
749zx_status_t fdio_clone_fd(int fd, int newfd, zx_handle_t* handles, uint32_t* types) {
750    zx_status_t r;
751    fdio_t* io;
752    if ((io = fd_to_io(fd)) == NULL) {
753        return ZX_ERR_BAD_HANDLE;
754    }
755    // TODO(ZX-973): implement/honor close-on-exec flag
756    if ((r = io->ops->clone(io, handles, types)) > 0) {
757        for (int i = 0; i < r; i++) {
758            types[i] |= (newfd << 16);
759        }
760    }
761    fdio_release(io);
762    return r;
763}
764
765__EXPORT
766zx_status_t fdio_transfer_fd(int fd, int newfd, zx_handle_t* handles, uint32_t* types) {
767    fdio_t* io;
768    zx_status_t status;
769    if ((status = fdio_unbind_from_fd(fd, &io)) < 0) {
770        return status;
771    }
772    status = io->ops->unwrap(io, handles, types);
773    fdio_release(io);
774    if (status < 0) {
775        return status;
776    }
777    for (int n = 0; n < status; n++) {
778        types[n] |= (newfd << 16);
779    }
780    return status;
781}
782
783__EXPORT
784ssize_t fdio_ioctl(int fd, int op, const void* in_buf, size_t in_len, void* out_buf, size_t out_len) {
785    fdio_t* io;
786    if ((io = fd_to_io(fd)) == NULL) {
787        return ZX_ERR_BAD_HANDLE;
788    }
789    ssize_t r = io->ops->ioctl(io, op, in_buf, in_len, out_buf, out_len);
790    fdio_release(io);
791    return r;
792}
793
794zx_status_t fdio_wait(fdio_t* io, uint32_t events, zx_time_t deadline,
795                      uint32_t* out_pending) {
796    zx_handle_t h = ZX_HANDLE_INVALID;
797    zx_signals_t signals = 0;
798    io->ops->wait_begin(io, events, &h, &signals);
799    if (h == ZX_HANDLE_INVALID)
800        // Wait operation is not applicable to the handle.
801        return ZX_ERR_INVALID_ARGS;
802
803    zx_signals_t pending;
804    zx_status_t status = zx_object_wait_one(h, signals, deadline, &pending);
805    if (status == ZX_OK || status == ZX_ERR_TIMED_OUT) {
806        io->ops->wait_end(io, pending, &events);
807        if (out_pending != NULL)
808            *out_pending = events;
809    }
810
811    return status;
812}
813
814__EXPORT
815zx_status_t fdio_wait_fd(int fd, uint32_t events, uint32_t* _pending, zx_time_t deadline) {
816    fdio_t* io = fd_to_io(fd);
817    if (io == NULL)
818        return ZX_ERR_BAD_HANDLE;
819
820    zx_status_t status = fdio_wait(io, events, deadline, _pending);
821
822    fdio_release(io);
823    return status;
824}
825
826static zx_status_t fdio_stat(fdio_t* io, struct stat* s) {
827    vnattr_t attr;
828    zx_status_t status = io->ops->get_attr(io, &attr);
829    if (status != ZX_OK) {
830        return status;
831    }
832    memset(s, 0, sizeof(struct stat));
833    s->st_mode = attr.mode;
834    s->st_ino = attr.inode;
835    s->st_size = attr.size;
836    s->st_blksize = attr.blksize;
837    s->st_blocks = attr.blkcount;
838    s->st_nlink = attr.nlink;
839    s->st_ctim.tv_sec = attr.create_time / ZX_SEC(1);
840    s->st_ctim.tv_nsec = attr.create_time % ZX_SEC(1);
841    s->st_mtim.tv_sec = attr.modify_time / ZX_SEC(1);
842    s->st_mtim.tv_nsec = attr.modify_time % ZX_SEC(1);
843    return ZX_OK;
844}
845
846// TODO(ZX-974): determine complete correct mapping
847int fdio_status_to_errno(zx_status_t status) {
848    switch (status) {
849    case ZX_ERR_NOT_FOUND: return ENOENT;
850    case ZX_ERR_NO_MEMORY: return ENOMEM;
851    case ZX_ERR_INVALID_ARGS: return EINVAL;
852    case ZX_ERR_BUFFER_TOO_SMALL: return EINVAL;
853    case ZX_ERR_TIMED_OUT: return ETIMEDOUT;
854    case ZX_ERR_UNAVAILABLE: return EBUSY;
855    case ZX_ERR_ALREADY_EXISTS: return EEXIST;
856    case ZX_ERR_PEER_CLOSED: return EPIPE;
857    case ZX_ERR_BAD_STATE: return EPIPE;
858    case ZX_ERR_BAD_PATH: return ENAMETOOLONG;
859    case ZX_ERR_IO: return EIO;
860    case ZX_ERR_NOT_FILE: return EISDIR;
861    case ZX_ERR_NOT_DIR: return ENOTDIR;
862    case ZX_ERR_NOT_SUPPORTED: return ENOTSUP;
863    case ZX_ERR_OUT_OF_RANGE: return EINVAL;
864    case ZX_ERR_NO_RESOURCES: return ENOMEM;
865    case ZX_ERR_BAD_HANDLE: return EBADF;
866    case ZX_ERR_ACCESS_DENIED: return EACCES;
867    case ZX_ERR_SHOULD_WAIT: return EAGAIN;
868    case ZX_ERR_FILE_BIG: return EFBIG;
869    case ZX_ERR_NO_SPACE: return ENOSPC;
870    case ZX_ERR_NOT_EMPTY: return ENOTEMPTY;
871    case ZX_ERR_IO_REFUSED: return ECONNREFUSED;
872    case ZX_ERR_IO_INVALID: return EIO;
873    case ZX_ERR_CANCELED: return EBADF;
874    case ZX_ERR_PROTOCOL_NOT_SUPPORTED: return EPROTONOSUPPORT;
875    case ZX_ERR_ADDRESS_UNREACHABLE: return ENETUNREACH;
876    case ZX_ERR_ADDRESS_IN_USE: return EADDRINUSE;
877    case ZX_ERR_NOT_CONNECTED: return ENOTCONN;
878    case ZX_ERR_CONNECTION_REFUSED: return ECONNREFUSED;
879    case ZX_ERR_CONNECTION_RESET: return ECONNRESET;
880    case ZX_ERR_CONNECTION_ABORTED: return ECONNABORTED;
881
882    // No specific translation, so return a generic errno value.
883    default: return EIO;
884    }
885}
886
887// The functions from here on provide implementations of fd and path
888// centric posix-y io operations.
889
890__EXPORT
891ssize_t readv(int fd, const struct iovec* iov, int num) {
892    ssize_t count = 0;
893    ssize_t r;
894    while (num > 0) {
895        if (iov->iov_len != 0) {
896            r = read(fd, iov->iov_base, iov->iov_len);
897            if (r < 0) {
898                return count ? count : r;
899            }
900            if ((size_t)r < iov->iov_len) {
901                return count + r;
902            }
903            count += r;
904        }
905        iov++;
906        num--;
907    }
908    return count;
909}
910
911__EXPORT
912ssize_t writev(int fd, const struct iovec* iov, int num) {
913    ssize_t count = 0;
914    ssize_t r;
915    while (num > 0) {
916        if (iov->iov_len != 0) {
917            r = write(fd, iov->iov_base, iov->iov_len);
918            if (r < 0) {
919                return count ? count : r;
920            }
921            if ((size_t)r < iov->iov_len) {
922                return count + r;
923            }
924            count += r;
925        }
926        iov++;
927        num--;
928    }
929    return count;
930}
931
932__EXPORT
933zx_status_t _mmap_file(size_t offset, size_t len, zx_vm_option_t zx_options, int flags, int fd,
934                       off_t fd_off, uintptr_t* out) {
935    fdio_t* io;
936    if ((io = fd_to_io(fd)) == NULL) {
937        return ZX_ERR_BAD_HANDLE;
938    }
939
940    int vflags = zx_options | (flags & MAP_PRIVATE ? FDIO_MMAP_FLAG_PRIVATE : 0);
941    zx_handle_t vmo;
942    zx_status_t r = io->ops->get_vmo(io, vflags, &vmo);
943    fdio_release(io);
944    if (r < 0) {
945        return r;
946    }
947
948    uintptr_t ptr = 0;
949    r = zx_vmar_map(zx_vmar_root_self(), zx_options, offset, vmo, fd_off, len, &ptr);
950    zx_handle_close(vmo);
951    // TODO: map this as shared if we ever implement forking
952    if (r < 0) {
953        return r;
954    }
955
956    *out = ptr;
957    return ZX_OK;
958}
959
960__EXPORT
961int unlinkat(int dirfd, const char* path, int flags) {
962    char name[NAME_MAX + 1];
963    fdio_t* io;
964    zx_status_t r;
965    if ((r = __fdio_opendir_containing_at(&io, dirfd, path, name)) < 0) {
966        return ERROR(r);
967    }
968    r = io->ops->unlink(io, name, strlen(name));
969    io->ops->close(io);
970    fdio_release(io);
971    return STATUS(r);
972}
973
974__EXPORT
975ssize_t read(int fd, void* buf, size_t count) {
976    if (buf == NULL && count > 0) {
977        return ERRNO(EINVAL);
978    }
979
980    fdio_t* io = fd_to_io(fd);
981    if (io == NULL) {
982        return ERRNO(EBADF);
983    }
984    zx_status_t status;
985    for (;;) {
986        status = io->ops->read(io, buf, count);
987        if (status != ZX_ERR_SHOULD_WAIT || io->ioflag & IOFLAG_NONBLOCK) {
988            break;
989        }
990        fdio_wait_fd(fd, FDIO_EVT_READABLE | FDIO_EVT_PEER_CLOSED, NULL, ZX_TIME_INFINITE);
991    }
992    fdio_release(io);
993    return status < 0 ? STATUS(status) : status;
994}
995
996__EXPORT
997ssize_t write(int fd, const void* buf, size_t count) {
998    if (buf == NULL && count > 0) {
999        return ERRNO(EINVAL);
1000    }
1001
1002    fdio_t* io = fd_to_io(fd);
1003    if (io == NULL) {
1004        return ERRNO(EBADF);
1005    }
1006    zx_status_t status;
1007    for (;;) {
1008        status = io->ops->write(io, buf, count);
1009        if ((status != ZX_ERR_SHOULD_WAIT) || (io->ioflag & IOFLAG_NONBLOCK)) {
1010            break;
1011        }
1012        fdio_wait_fd(fd, FDIO_EVT_WRITABLE | FDIO_EVT_PEER_CLOSED, NULL, ZX_TIME_INFINITE);
1013    }
1014    fdio_release(io);
1015    return status < 0 ? STATUS(status) : status;
1016}
1017
1018__EXPORT
1019ssize_t preadv(int fd, const struct iovec* iov, int count, off_t ofs) {
1020    ssize_t iov_count = 0;
1021    ssize_t r;
1022    while (count > 0) {
1023        if (iov->iov_len != 0) {
1024            r = pread(fd, iov->iov_base, iov->iov_len, ofs);
1025            if (r < 0) {
1026                return iov_count ? iov_count : r;
1027            }
1028            if ((size_t)r < iov->iov_len) {
1029                return iov_count + r;
1030            }
1031            iov_count += r;
1032            ofs += r;
1033        }
1034        iov++;
1035        count--;
1036    }
1037    return iov_count;
1038}
1039
1040__EXPORT
1041ssize_t pread(int fd, void* buf, size_t size, off_t ofs) {
1042    if (buf == NULL && size > 0) {
1043        return ERRNO(EINVAL);
1044    }
1045
1046    fdio_t* io = fd_to_io(fd);
1047    if (io == NULL) {
1048        return ERRNO(EBADF);
1049    }
1050    zx_status_t status;
1051    for (;;) {
1052        status = io->ops->read_at(io, buf, size, ofs);
1053        if ((status != ZX_ERR_SHOULD_WAIT) || (io->ioflag & IOFLAG_NONBLOCK)) {
1054            break;
1055        }
1056        fdio_wait_fd(fd, FDIO_EVT_READABLE | FDIO_EVT_PEER_CLOSED, NULL, ZX_TIME_INFINITE);
1057    }
1058    fdio_release(io);
1059    return status < 0 ? STATUS(status) : status;
1060}
1061
1062__EXPORT
1063ssize_t pwritev(int fd, const struct iovec* iov, int count, off_t ofs) {
1064    ssize_t iov_count = 0;
1065    ssize_t r;
1066    while (count > 0) {
1067        if (iov->iov_len != 0) {
1068            r = pwrite(fd, iov->iov_base, iov->iov_len, ofs);
1069            if (r < 0) {
1070                return iov_count ? iov_count : r;
1071            }
1072            if ((size_t)r < iov->iov_len) {
1073                return iov_count + r;
1074            }
1075            iov_count += r;
1076            ofs += r;
1077        }
1078        iov++;
1079        count--;
1080    }
1081    return iov_count;
1082}
1083
1084__EXPORT
1085ssize_t pwrite(int fd, const void* buf, size_t size, off_t ofs) {
1086    if (buf == NULL && size > 0) {
1087        return ERRNO(EINVAL);
1088    }
1089
1090    fdio_t* io = fd_to_io(fd);
1091    if (io == NULL) {
1092        return ERRNO(EBADF);
1093    }
1094    zx_status_t status;
1095    for (;;) {
1096        status = io->ops->write_at(io, buf, size, ofs);
1097        if ((status != ZX_ERR_SHOULD_WAIT) || (io->ioflag & IOFLAG_NONBLOCK)) {
1098            break;
1099        }
1100        fdio_wait_fd(fd, FDIO_EVT_WRITABLE | FDIO_EVT_PEER_CLOSED, NULL, ZX_TIME_INFINITE);
1101    }
1102    fdio_release(io);
1103    return status < 0 ? STATUS(status) : status;
1104}
1105
1106__EXPORT
1107int close(int fd) {
1108    mtx_lock(&fdio_lock);
1109    if ((fd < 0) || (fd >= FDIO_MAX_FD) || (fdio_fdtab[fd] == NULL)) {
1110        mtx_unlock(&fdio_lock);
1111        return ERRNO(EBADF);
1112    }
1113    fdio_t* io = fdio_fdtab[fd];
1114    io->dupcount--;
1115    fdio_fdtab[fd] = NULL;
1116    LOG(1, "fdio: close(%d) dupcount=%u\n", io->dupcount);
1117    if (io->dupcount > 0) {
1118        // still alive in other fdtab slots
1119        mtx_unlock(&fdio_lock);
1120        fdio_release(io);
1121        return ZX_OK;
1122    } else {
1123        mtx_unlock(&fdio_lock);
1124        int r = io->ops->close(io);
1125        fdio_release(io);
1126        return STATUS(r);
1127    }
1128}
1129
1130static int fdio_dup(int oldfd, int newfd, int starting_fd) {
1131    fdio_t* io = fd_to_io(oldfd);
1132    if (io == NULL) {
1133        return ERRNO(EBADF);
1134    }
1135    int fd = fdio_bind_to_fd(io, newfd, starting_fd);
1136    if (fd < 0) {
1137        fdio_release(io);
1138    }
1139    return fd;
1140}
1141
1142__EXPORT
1143int dup2(int oldfd, int newfd) {
1144    return fdio_dup(oldfd, newfd, 0);
1145}
1146
1147__EXPORT
1148int dup(int oldfd) {
1149    return fdio_dup(oldfd, -1, 0);
1150}
1151
1152__EXPORT
1153int dup3(int oldfd, int newfd, int flags) {
1154    // dup3 differs from dup2 in that it fails with EINVAL, rather
1155    // than being a no op, on being given the same fd for both old and
1156    // new.
1157    if (oldfd == newfd) {
1158        return ERRNO(EINVAL);
1159    }
1160
1161    if (flags != 0 && flags != O_CLOEXEC) {
1162        return ERRNO(EINVAL);
1163    }
1164
1165    // TODO(ZX-973) Implement O_CLOEXEC.
1166    return fdio_dup(oldfd, newfd, 0);
1167}
1168
1169__EXPORT
1170int fcntl(int fd, int cmd, ...) {
1171// Note that it is not safe to pull out the int out of the
1172// variadic arguments at the top level, as callers are not
1173// required to pass anything for many of the commands.
1174#define GET_INT_ARG(ARG)         \
1175    va_list args;                \
1176    va_start(args, cmd);         \
1177    int ARG = va_arg(args, int); \
1178    va_end(args)
1179
1180    switch (cmd) {
1181    case F_DUPFD:
1182    case F_DUPFD_CLOEXEC: {
1183        // TODO(ZX-973) Implement CLOEXEC.
1184        GET_INT_ARG(starting_fd);
1185        return fdio_dup(fd, -1, starting_fd);
1186    }
1187    case F_GETFD: {
1188        fdio_t* io = fd_to_io(fd);
1189        if (io == NULL) {
1190            return ERRNO(EBADF);
1191        }
1192        int flags = (int)(io->ioflag & IOFLAG_FD_FLAGS);
1193        // POSIX mandates that the return value be nonnegative if successful.
1194        assert(flags >= 0);
1195        fdio_release(io);
1196        return flags;
1197    }
1198    case F_SETFD: {
1199        fdio_t* io = fd_to_io(fd);
1200        if (io == NULL) {
1201            return ERRNO(EBADF);
1202        }
1203        GET_INT_ARG(flags);
1204        // TODO(ZX-973) Implement CLOEXEC.
1205        io->ioflag &= ~IOFLAG_FD_FLAGS;
1206        io->ioflag |= (uint32_t)flags & IOFLAG_FD_FLAGS;
1207        fdio_release(io);
1208        return 0;
1209    }
1210    case F_GETFL: {
1211        fdio_t* io = fd_to_io(fd);
1212        if (io == NULL) {
1213            return ERRNO(EBADF);
1214        }
1215        uint32_t flags = 0;
1216        zx_status_t r = io->ops->get_flags(io, &flags);
1217        if (r == ZX_ERR_NOT_SUPPORTED) {
1218            // We treat this as non-fatal, as it's valid for a remote to
1219            // simply not support FCNTL, but we still want to correctly
1220            // report the state of the (local) NONBLOCK flag
1221            flags = 0;
1222            r = ZX_OK;
1223        }
1224        flags = zxio_flags_to_fdio(flags);
1225        if (io->ioflag & IOFLAG_NONBLOCK) {
1226            flags |= O_NONBLOCK;
1227        }
1228        fdio_release(io);
1229        if (r < 0) {
1230            return STATUS(r);
1231        }
1232        return flags;
1233    }
1234    case F_SETFL: {
1235        fdio_t* io = fd_to_io(fd);
1236        if (io == NULL) {
1237            return ERRNO(EBADF);
1238        }
1239        GET_INT_ARG(n);
1240
1241        zx_status_t r;
1242        uint32_t flags = fdio_flags_to_zxio(n & ~O_NONBLOCK);
1243        r = io->ops->set_flags(io, flags);
1244
1245        // Some remotes don't support setting flags; we
1246        // can adjust their local flags anyway if NONBLOCK
1247        // is the only bit being toggled.
1248        if (r == ZX_ERR_NOT_SUPPORTED && ((n | O_NONBLOCK) == O_NONBLOCK)) {
1249            r = ZX_OK;
1250        }
1251
1252        if (r != ZX_OK) {
1253            n = STATUS(r);
1254        } else {
1255            if (n & O_NONBLOCK) {
1256                io->ioflag |= IOFLAG_NONBLOCK;
1257            } else {
1258                io->ioflag &= ~IOFLAG_NONBLOCK;
1259            }
1260            n = 0;
1261        }
1262        fdio_release(io);
1263        return n;
1264    }
1265    case F_GETOWN:
1266    case F_SETOWN:
1267        // TODO(kulakowski) Socket support.
1268        return ERRNO(ENOSYS);
1269    case F_GETLK:
1270    case F_SETLK:
1271    case F_SETLKW:
1272        // TODO(kulakowski) Advisory file locking support.
1273        return ERRNO(ENOSYS);
1274    default:
1275        return ERRNO(EINVAL);
1276    }
1277
1278#undef GET_INT_ARG
1279}
1280
1281__EXPORT
1282off_t lseek(int fd, off_t offset, int whence) {
1283    fdio_t* io = fd_to_io(fd);
1284    if (io == NULL) {
1285        return ERRNO(EBADF);
1286    }
1287    off_t r = io->ops->seek(io, offset, whence);
1288    if (r == ZX_ERR_WRONG_TYPE) {
1289        // Although 'ESPIPE' is a bit of a misnomer, it is the valid errno
1290        // for any fd which does not implement seeking (i.e., for pipes,
1291        // sockets, etc).
1292        errno = ESPIPE;
1293        r = -1;
1294    } else if (r < 0) {
1295        r = ERROR(r);
1296    }
1297    fdio_release(io);
1298    return r;
1299}
1300
1301static int getdirents(int fd, void* ptr, size_t len, long cmd) {
1302    size_t actual;
1303    zx_status_t status;
1304    fdio_t* io = fd_to_io(fd);
1305    if (io == NULL) {
1306        return ERRNO(EBADF);
1307    }
1308    if (cmd == READDIR_CMD_RESET) {
1309        if ((status = io->ops->rewind(io)) != ZX_OK) {
1310            goto done;
1311        }
1312    }
1313    if ((status = io->ops->readdir(io, ptr, len, &actual)) != ZX_OK) {
1314        goto done;
1315    }
1316
1317done:
1318    fdio_release(io);
1319    return status == ZX_OK ? (int) actual : ERROR(status);
1320}
1321
1322static int truncateat(int dirfd, const char* path, off_t len) {
1323    fdio_t* io;
1324    zx_status_t r;
1325
1326    if ((r = __fdio_open_at(&io, dirfd, path, O_WRONLY, 0)) < 0) {
1327        return ERROR(r);
1328    }
1329    r = io->ops->truncate(io, len);
1330    fdio_close(io);
1331    fdio_release(io);
1332    return STATUS(r);
1333}
1334
1335__EXPORT
1336int truncate(const char* path, off_t len) {
1337    return truncateat(AT_FDCWD, path, len);
1338}
1339
1340__EXPORT
1341int ftruncate(int fd, off_t len) {
1342    fdio_t* io = fd_to_io(fd);
1343    if (io == NULL) {
1344        return ERRNO(EBADF);
1345    }
1346
1347    zx_status_t r = io->ops->truncate(io, len);
1348    fdio_release(io);
1349    return STATUS(r);
1350}
1351
1352// Filesystem operations (such as rename and link) which act on multiple paths
1353// have some additional complexity on Zircon. These operations (eventually) act
1354// on two pairs of variables: a source parent vnode + name, and a target parent
1355// vnode + name. However, the loose coupling of these pairs can make their
1356// correspondence difficult, especially when accessing each parent vnode may
1357// involve crossing various filesystem boundaries.
1358//
1359// To resolve this problem, these kinds of operations involve:
1360// - Opening the source parent vnode directly.
1361// - Opening the target parent vnode directly, + acquiring a "vnode token".
1362// - Sending the real operation + names to the source parent vnode, along with
1363//   the "vnode token" representing the target parent vnode.
1364//
1365// Using zircon kernel primitives (cookies) to authenticate the vnode token, this
1366// allows these multi-path operations to mix absolute / relative paths and cross
1367// mount points with ease.
1368static int two_path_op_at(uint32_t op, int olddirfd, const char* oldpath,
1369                          int newdirfd, const char* newpath) {
1370    char oldname[NAME_MAX + 1];
1371    fdio_t* io_oldparent;
1372    zx_status_t status = ZX_OK;
1373    if ((status = __fdio_opendir_containing_at(&io_oldparent, olddirfd, oldpath, oldname)) < 0) {
1374        return ERROR(status);
1375    }
1376
1377    char newname[NAME_MAX + 1];
1378    fdio_t* io_newparent;
1379    if ((status = __fdio_opendir_containing_at(&io_newparent, newdirfd, newpath, newname)) < 0) {
1380        goto oldparent_open;
1381    }
1382
1383    zx_handle_t token;
1384    status = io_newparent->ops->get_token(io_newparent, &token);
1385    if (status < 0) {
1386        goto newparent_open;
1387    }
1388
1389    if (op == fuchsia_io_DirectoryRenameOrdinal) {
1390        status = io_oldparent->ops->rename(io_oldparent, oldname,
1391                                           strlen(oldname), token, newname,
1392                                           strlen(newname));
1393    } else if (op == fuchsia_io_DirectoryLinkOrdinal) {
1394        status = io_oldparent->ops->link(io_oldparent, oldname, strlen(oldname),
1395                                         token, newname, strlen(newname));
1396    } else {
1397        zx_handle_close(token);
1398        status = ZX_ERR_NOT_SUPPORTED;
1399    }
1400newparent_open:
1401    io_newparent->ops->close(io_newparent);
1402    fdio_release(io_newparent);
1403oldparent_open:
1404    io_oldparent->ops->close(io_oldparent);
1405    fdio_release(io_oldparent);
1406    return STATUS(status);
1407}
1408
1409__EXPORT
1410int renameat(int olddirfd, const char* oldpath, int newdirfd, const char* newpath) {
1411    return two_path_op_at(fuchsia_io_DirectoryRenameOrdinal, olddirfd, oldpath, newdirfd, newpath);
1412}
1413
1414__EXPORT
1415int rename(const char* oldpath, const char* newpath) {
1416    return two_path_op_at(fuchsia_io_DirectoryRenameOrdinal, AT_FDCWD, oldpath, AT_FDCWD, newpath);
1417}
1418
1419__EXPORT
1420int link(const char* oldpath, const char* newpath) {
1421    return two_path_op_at(fuchsia_io_DirectoryLinkOrdinal, AT_FDCWD, oldpath, AT_FDCWD, newpath);
1422}
1423
1424__EXPORT
1425int unlink(const char* path) {
1426    return unlinkat(AT_FDCWD, path, 0);
1427}
1428
1429static int vopenat(int dirfd, const char* path, int flags, va_list args) {
1430    fdio_t* io = NULL;
1431    zx_status_t r;
1432    int fd;
1433    uint32_t mode = 0;
1434
1435    if (flags & O_CREAT) {
1436        if (flags & O_DIRECTORY) {
1437            // The behavior of open with O_CREAT | O_DIRECTORY is underspecified
1438            // in POSIX. To help avoid programmer error, we explicitly disallow
1439            // the combination.
1440            return ERRNO(EINVAL);
1441        }
1442        mode = va_arg(args, uint32_t) & 0777;
1443    }
1444    if ((r = __fdio_open_at(&io, dirfd, path, flags, mode)) < 0) {
1445        return ERROR(r);
1446    }
1447    if (flags & O_NONBLOCK) {
1448        io->ioflag |= IOFLAG_NONBLOCK;
1449    }
1450    if ((fd = fdio_bind_to_fd(io, -1, 0)) < 0) {
1451        io->ops->close(io);
1452        fdio_release(io);
1453        return ERRNO(EMFILE);
1454    }
1455    return fd;
1456}
1457
1458__EXPORT
1459int open(const char* path, int flags, ...) {
1460    va_list ap;
1461    va_start(ap, flags);
1462    int ret = vopenat(AT_FDCWD, path, flags, ap);
1463    va_end(ap);
1464    return ret;
1465}
1466
1467__EXPORT
1468int openat(int dirfd, const char* path, int flags, ...) {
1469    va_list ap;
1470    va_start(ap, flags);
1471    int ret = vopenat(dirfd, path, flags, ap);
1472    va_end(ap);
1473    return ret;
1474}
1475
1476__EXPORT
1477int mkdir(const char* path, mode_t mode) {
1478    return mkdirat(AT_FDCWD, path, mode);
1479}
1480
1481__EXPORT
1482int mkdirat(int dirfd, const char* path, mode_t mode) {
1483    fdio_t* io = NULL;
1484    zx_status_t r;
1485
1486    mode = (mode & 0777) | S_IFDIR;
1487
1488    if ((r = __fdio_open_at(&io, dirfd, path, O_RDONLY | O_CREAT | O_EXCL, mode)) < 0) {
1489        return ERROR(r);
1490    }
1491    io->ops->close(io);
1492    fdio_release(io);
1493    return 0;
1494}
1495
1496__EXPORT
1497int fsync(int fd) {
1498    fdio_t* io = fd_to_io(fd);
1499    if (io == NULL) {
1500        return ERRNO(EBADF);
1501    }
1502    zx_status_t r = io->ops->sync(io);
1503    fdio_release(io);
1504    return STATUS(r);
1505}
1506
1507__EXPORT
1508int fdatasync(int fd) {
1509    // TODO(smklein): fdatasync does not need to flush metadata under certain
1510    // circumstances -- however, for now, this implementation will appear
1511    // functionally the same (if a little slower).
1512    return fsync(fd);
1513}
1514
1515__EXPORT
1516int syncfs(int fd) {
1517    // TODO(smklein): Currently, fsync syncs the entire filesystem, not just
1518    // the target file descriptor. These functions should use different sync
1519    // mechanisms, where fsync is more fine-grained.
1520    return fsync(fd);
1521}
1522
1523__EXPORT
1524int fstat(int fd, struct stat* s) {
1525    fdio_t* io = fd_to_io(fd);
1526    if (io == NULL) {
1527        return ERRNO(EBADF);
1528    }
1529    int r = STATUS(fdio_stat(io, s));
1530    fdio_release(io);
1531    return r;
1532}
1533
1534__EXPORT
1535int fstatat(int dirfd, const char* fn, struct stat* s, int flags) {
1536    fdio_t* io;
1537    zx_status_t r;
1538
1539    LOG(1,"fdio: fstatat(%d, '%s',...)\n", dirfd, fn);
1540    if ((r = __fdio_open_at(&io, dirfd, fn, O_PATH, 0)) < 0) {
1541        return ERROR(r);
1542    }
1543    LOG(1,"fdio: fstatat io=%p\n", io);
1544    r = fdio_stat(io, s);
1545    fdio_close(io);
1546    fdio_release(io);
1547    return STATUS(r);
1548}
1549
1550__EXPORT
1551int stat(const char* fn, struct stat* s) {
1552    return fstatat(AT_FDCWD, fn, s, 0);
1553}
1554
1555__EXPORT
1556int lstat(const char* path, struct stat* buf) {
1557    return stat(path, buf);
1558}
1559
1560__EXPORT
1561char* realpath(const char* restrict filename, char* restrict resolved) {
1562    ssize_t r;
1563    struct stat st;
1564    char tmp[PATH_MAX];
1565    size_t outlen;
1566    bool is_dir;
1567
1568    if (!filename) {
1569        errno = EINVAL;
1570        return NULL;
1571    }
1572
1573    if (filename[0] != '/') {
1574        // Convert 'filename' from a relative path to an absolute path.
1575        size_t file_len = strlen(filename);
1576        mtx_lock(&fdio_cwd_lock);
1577        size_t cwd_len = strlen(fdio_cwd_path);
1578        if (cwd_len + 1 + file_len >= PATH_MAX) {
1579            mtx_unlock(&fdio_cwd_lock);
1580            errno = ENAMETOOLONG;
1581            return NULL;
1582        }
1583        char tmp2[PATH_MAX];
1584        memcpy(tmp2, fdio_cwd_path, cwd_len);
1585        mtx_unlock(&fdio_cwd_lock);
1586        tmp2[cwd_len] = '/';
1587        strcpy(tmp2 + cwd_len + 1, filename);
1588        zx_status_t status = __fdio_cleanpath(tmp2, tmp, &outlen, &is_dir);
1589        if (status != ZX_OK) {
1590            errno = EINVAL;
1591            return NULL;
1592        }
1593    } else {
1594        // Clean the provided absolute path
1595        zx_status_t status = __fdio_cleanpath(filename, tmp, &outlen, &is_dir);
1596        if (status != ZX_OK) {
1597            errno = EINVAL;
1598            return NULL;
1599        }
1600
1601        r = stat(tmp, &st);
1602        if (r < 0) {
1603            return NULL;
1604        }
1605    }
1606    return resolved ? strcpy(resolved, tmp) : strdup(tmp);
1607}
1608
1609static zx_status_t zx_utimens(fdio_t* io, const struct timespec times[2],
1610                              int flags) {
1611    vnattr_t vn;
1612    vn.valid = 0;
1613
1614    // Extract modify time.
1615    vn.modify_time = (times == NULL || times[1].tv_nsec == UTIME_NOW)
1616        ? zx_clock_get(ZX_CLOCK_UTC)
1617        : zx_time_add_duration(ZX_SEC(times[1].tv_sec), times[1].tv_nsec);
1618
1619    if (times == NULL || times[1].tv_nsec != UTIME_OMIT) {
1620        // For setattr, tell which fields are valid.
1621        vn.valid = ATTR_MTIME;
1622    }
1623
1624    // set time(s) on underlying object
1625    return io->ops->set_attr(io, &vn);
1626}
1627
1628__EXPORT
1629int utimensat(int dirfd, const char *fn,
1630              const struct timespec times[2], int flags) {
1631    fdio_t* io;
1632    zx_status_t r;
1633
1634    // TODO(orr): AT_SYMLINK_NOFOLLOW
1635    if ((flags & AT_SYMLINK_NOFOLLOW) != 0) {
1636        // Allow this flag - don't return an error.  Fuchsia does not support
1637        // symlinks, so don't break utilities (like tar) that use this flag.
1638    }
1639
1640    if ((r = __fdio_open_at(&io, dirfd, fn, 0, 0)) < 0) {
1641        return ERROR(r);
1642    }
1643
1644    r = zx_utimens(io, times, 0);
1645
1646    fdio_close(io);
1647    fdio_release(io);
1648    return STATUS(r);
1649}
1650
1651__EXPORT
1652int futimens(int fd, const struct timespec times[2]) {
1653    fdio_t* io = fd_to_io(fd);
1654    zx_status_t r = zx_utimens(io, times, 0);
1655    fdio_release(io);
1656    return STATUS(r);
1657}
1658
1659__EXPORT
1660int pipe2(int pipefd[2], int flags) {
1661    const int allowed_flags = O_NONBLOCK | O_CLOEXEC;
1662    if (flags & ~allowed_flags) {
1663        return ERRNO(EINVAL);
1664    }
1665    fdio_t *a, *b;
1666    int r = fdio_pipe_pair(&a, &b);
1667    if (r < 0) {
1668        return ERROR(r);
1669    }
1670    pipefd[0] = fdio_bind_to_fd(a, -1, 0);
1671    if (pipefd[0] < 0) {
1672        int errno_ = errno;
1673        fdio_close(a);
1674        fdio_release(a);
1675        fdio_close(b);
1676        fdio_release(b);
1677        return ERRNO(errno_);
1678    }
1679    pipefd[1] = fdio_bind_to_fd(b, -1, 0);
1680    if (pipefd[1] < 0) {
1681        int errno_ = errno;
1682        close(pipefd[0]);
1683        fdio_close(b);
1684        fdio_release(b);
1685        return ERRNO(errno_);
1686    }
1687    return 0;
1688}
1689
1690__EXPORT
1691int pipe(int pipefd[2]) {
1692    return pipe2(pipefd, 0);
1693}
1694
1695__EXPORT
1696int socketpair(int domain, int type, int protocol, int fd[2]) {
1697    if (type != SOCK_STREAM) {  // TODO(jamesr): SOCK_DGRAM
1698        errno = EPROTOTYPE;
1699        return -1;
1700    }
1701    if (domain != AF_UNIX) {
1702        errno = EAFNOSUPPORT;
1703        return -1;
1704    }
1705    if (protocol != 0) {
1706        errno = EPROTONOSUPPORT;
1707        return -1;
1708    }
1709
1710    return pipe(fd);
1711}
1712
1713__EXPORT
1714int faccessat(int dirfd, const char* filename, int amode, int flag) {
1715    // For now, we just check to see if the file exists, until we
1716    // model permissions. But first, check that the flags and amode
1717    // are valid.
1718    const int allowed_flags = AT_EACCESS;
1719    if (flag & (~allowed_flags)) {
1720        return ERRNO(EINVAL);
1721    }
1722
1723    // amode is allowed to be either a subset of this mask, or just F_OK.
1724    const int allowed_modes = R_OK | W_OK | X_OK;
1725    if (amode != F_OK && (amode & (~allowed_modes))) {
1726        return ERRNO(EINVAL);
1727    }
1728
1729    // Since we are not tracking permissions yet, just check that the
1730    // file exists a la fstatat.
1731    fdio_t* io;
1732    zx_status_t status;
1733    if ((status = __fdio_open_at(&io, dirfd, filename, 0, 0)) < 0) {
1734        return ERROR(status);
1735    }
1736    struct stat s;
1737    status = fdio_stat(io, &s);
1738    fdio_close(io);
1739    fdio_release(io);
1740    return STATUS(status);
1741}
1742
1743__EXPORT
1744char* getcwd(char* buf, size_t size) {
1745    char tmp[PATH_MAX];
1746    if (buf == NULL) {
1747        buf = tmp;
1748        size = PATH_MAX;
1749    } else if (size == 0) {
1750        errno = EINVAL;
1751        return NULL;
1752    }
1753
1754    char* out = NULL;
1755    mtx_lock(&fdio_cwd_lock);
1756    size_t len = strlen(fdio_cwd_path) + 1;
1757    if (len < size) {
1758        memcpy(buf, fdio_cwd_path, len);
1759        out = buf;
1760    } else {
1761        errno = ERANGE;
1762    }
1763    mtx_unlock(&fdio_cwd_lock);
1764
1765    if (out == tmp) {
1766        out = strdup(tmp);
1767    }
1768    return out;
1769}
1770
1771void fdio_chdir(fdio_t* io, const char* path) {
1772    mtx_lock(&fdio_cwd_lock);
1773    update_cwd_path(path);
1774    mtx_lock(&fdio_lock);
1775    fdio_t* old = fdio_cwd_handle;
1776    fdio_cwd_handle = io;
1777    old->ops->close(old);
1778    fdio_release(old);
1779    mtx_unlock(&fdio_lock);
1780    mtx_unlock(&fdio_cwd_lock);
1781}
1782
1783__EXPORT
1784int chdir(const char* path) {
1785    fdio_t* io;
1786    zx_status_t r;
1787    if ((r = __fdio_open(&io, path, O_RDONLY | O_DIRECTORY, 0)) < 0) {
1788        return STATUS(r);
1789    }
1790    fdio_chdir(io, path);
1791    return 0;
1792}
1793
1794#define DIR_BUFSIZE 2048
1795
1796struct __dirstream {
1797    mtx_t lock;
1798    int fd;
1799    // Total size of 'data' which has been filled with dirents
1800    size_t size;
1801    // Offset into 'data' of next ptr. NULL to reset the
1802    // directory lazily on the next call to getdirents
1803    uint8_t* ptr;
1804    // Internal cache of dirents
1805    uint8_t data[DIR_BUFSIZE];
1806    // Buffer returned to user
1807    struct dirent de;
1808};
1809
1810static DIR* internal_opendir(int fd) {
1811    DIR* dir = calloc(1, sizeof(*dir));
1812    if (dir != NULL) {
1813        mtx_init(&dir->lock, mtx_plain);
1814        dir->size = 0;
1815        dir->fd = fd;
1816    }
1817    return dir;
1818}
1819
1820__EXPORT
1821DIR* opendir(const char* name) {
1822    int fd = open(name, O_RDONLY | O_DIRECTORY);
1823    if (fd < 0)
1824        return NULL;
1825    DIR* dir = internal_opendir(fd);
1826    if (dir == NULL)
1827        close(fd);
1828    return dir;
1829}
1830
1831__EXPORT
1832DIR* fdopendir(int fd) {
1833    // Check the fd for validity, but we'll just store the fd
1834    // number so we don't save the fdio_t pointer.
1835    fdio_t* io = fd_to_io(fd);
1836    if (io == NULL) {
1837        errno = EBADF;
1838        return NULL;
1839    }
1840    // TODO(mcgrathr): Technically this should verify that it's
1841    // really a directory and fail with ENOTDIR if not.  But
1842    // that's not so easy to do, so don't bother for now.
1843    fdio_release(io);
1844    return internal_opendir(fd);
1845}
1846
1847__EXPORT
1848int closedir(DIR* dir) {
1849    close(dir->fd);
1850    free(dir);
1851    return 0;
1852}
1853
1854__EXPORT
1855struct dirent* readdir(DIR* dir) {
1856    mtx_lock(&dir->lock);
1857    struct dirent* de = &dir->de;
1858    for (;;) {
1859        if (dir->size >= sizeof(vdirent_t)) {
1860            vdirent_t* vde = (void*)dir->ptr;
1861
1862            if (dir->size < vde->size + sizeof(vdirent_t)) {
1863                // This buffer is corrupted (not large enough to hold a name).
1864                // Reset it.
1865                dir->ptr = NULL;
1866                dir->size = 0;
1867                break;
1868            }
1869
1870            dir->ptr += vde->size + sizeof(vdirent_t);
1871            dir->size -= vde->size + sizeof(vdirent_t);
1872
1873            if (vde->size == 0) {
1874                // Skip nameless entries.
1875                // (they may be generated by filtering filesystems)
1876                continue;
1877            }
1878
1879            // The remaining portion of the buffer is large
1880            // enough to hold the dirent name.
1881            size_t namelen = vde->size;
1882            de->d_ino = vde->ino;
1883            de->d_off = 0;
1884            // The d_reclen field is nonstandard, but existing code
1885            // may expect it to be useful as an upper bound on the
1886            // length of the name.
1887            de->d_reclen = offsetof(struct dirent, d_name) + namelen + 1;
1888            de->d_type = vde->type;
1889            memcpy(de->d_name, vde->name, namelen);
1890            de->d_name[namelen] = '\0';
1891            break;
1892        }
1893        int64_t cmd = (dir->ptr == NULL) ? READDIR_CMD_RESET : READDIR_CMD_NONE;
1894        int r = getdirents(dir->fd, dir->data, DIR_BUFSIZE, cmd);
1895        if (r > 0) {
1896            dir->ptr = dir->data;
1897            dir->size = r;
1898            continue;
1899        }
1900        de = NULL;
1901        break;
1902    }
1903    mtx_unlock(&dir->lock);
1904    return de;
1905}
1906
1907__EXPORT
1908void rewinddir(DIR* dir) {
1909    mtx_lock(&dir->lock);
1910    dir->size = 0;
1911    dir->ptr = NULL;
1912    mtx_unlock(&dir->lock);
1913}
1914
1915__EXPORT
1916int dirfd(DIR* dir) {
1917    return dir->fd;
1918}
1919
1920__EXPORT
1921int isatty(int fd) {
1922    fdio_t* io = fd_to_io(fd);
1923    if (io == NULL) {
1924        errno = EBADF;
1925        return 0;
1926    }
1927
1928    int ret;
1929    // TODO(ZX-972)
1930    // For now, stdout etc. needs to be a tty for line buffering to
1931    // work. So let's pretend those are ttys but nothing else is.
1932    if (fd == 0 || fd == 1 || fd == 2) {
1933        ret = 1;
1934    } else {
1935        ret = 0;
1936        errno = ENOTTY;
1937    }
1938
1939    fdio_release(io);
1940
1941    return ret;
1942}
1943
1944__EXPORT
1945mode_t umask(mode_t mask) {
1946    mode_t oldmask;
1947    mtx_lock(&fdio_lock);
1948    oldmask = __fdio_global_state.umask;
1949    __fdio_global_state.umask = mask & 0777;
1950    mtx_unlock(&fdio_lock);
1951    return oldmask;
1952}
1953
1954__EXPORT
1955int fdio_handle_fd(zx_handle_t h, zx_signals_t signals_in, zx_signals_t signals_out,
1956                   bool shared_handle) {
1957    fdio_t* io = fdio_waitable_create(h, signals_in, signals_out, shared_handle);
1958    int fd = fdio_bind_to_fd(io, -1, 0);
1959    if (fd < 0) {
1960        fdio_close(io);
1961        fdio_release(io);
1962    }
1963    return fd;
1964}
1965
1966// from fdio/private.h, to support message-loop integration
1967
1968__EXPORT
1969void __fdio_wait_begin(fdio_t* io, uint32_t events,
1970                       zx_handle_t* handle_out, zx_signals_t* signals_out) {
1971    return io->ops->wait_begin(io, events, handle_out, signals_out);
1972}
1973
1974__EXPORT
1975void __fdio_wait_end(fdio_t* io, zx_signals_t signals, uint32_t* events_out) {
1976    return io->ops->wait_end(io, signals, events_out);
1977}
1978
1979__EXPORT
1980void __fdio_release(fdio_t* io) {
1981    fdio_release(io);
1982}
1983
1984
1985// TODO: getrlimit(RLIMIT_NOFILE, ...)
1986#define MAX_POLL_NFDS 1024
1987
1988__EXPORT
1989int ppoll(struct pollfd* fds, nfds_t n,
1990          const struct timespec* timeout_ts, const sigset_t* sigmask) {
1991    if (sigmask) {
1992        return ERRNO(ENOSYS);
1993    }
1994    if (n > MAX_POLL_NFDS) {
1995        return ERRNO(EINVAL);
1996    }
1997
1998    fdio_t* ios[n];
1999    int ios_used_max = -1;
2000
2001    zx_status_t r = ZX_OK;
2002    nfds_t nvalid = 0;
2003
2004    zx_wait_item_t items[n];
2005
2006    for (nfds_t i = 0; i < n; i++) {
2007        struct pollfd* pfd = &fds[i];
2008        pfd->revents = 0; // initialize to zero
2009
2010        ios[i] = NULL;
2011        if (pfd->fd < 0) {
2012            // if fd is negative, the entry is invalid
2013            continue;
2014        }
2015        fdio_t* io;
2016        if ((io = fd_to_io(pfd->fd)) == NULL) {
2017            // fd is not opened
2018            pfd->revents = POLLNVAL;
2019            continue;
2020        }
2021        ios[i] = io;
2022        ios_used_max = i;
2023
2024        zx_handle_t h;
2025        zx_signals_t sigs;
2026        io->ops->wait_begin(io, pfd->events, &h, &sigs);
2027        if (h == ZX_HANDLE_INVALID) {
2028            // wait operation is not applicable to the handle
2029            r = ZX_ERR_INVALID_ARGS;
2030            break;
2031        }
2032        items[nvalid].handle = h;
2033        items[nvalid].waitfor = sigs;
2034        items[nvalid].pending = 0;
2035        nvalid++;
2036    }
2037
2038    int nfds = 0;
2039    if (r == ZX_OK && nvalid > 0) {
2040        zx_time_t tmo = ZX_TIME_INFINITE;
2041        // Check for overflows on every operation.
2042        if (timeout_ts && timeout_ts->tv_sec >= 0 && timeout_ts->tv_nsec >= 0 &&
2043            timeout_ts->tv_sec <= INT64_MAX / ZX_SEC(1)) {
2044            zx_duration_t seconds_duration = ZX_SEC(timeout_ts->tv_sec);
2045            zx_duration_t duration =
2046                zx_duration_add_duration(seconds_duration, timeout_ts->tv_nsec);
2047            if (duration >= seconds_duration) {
2048                tmo = zx_deadline_after(duration);
2049            }
2050        }
2051        r = zx_object_wait_many(items, nvalid, tmo);
2052        // pending signals could be reported on ZX_ERR_TIMED_OUT case as well
2053        if (r == ZX_OK || r == ZX_ERR_TIMED_OUT) {
2054            nfds_t j = 0; // j counts up on a valid entry
2055
2056            for (nfds_t i = 0; i < n; i++) {
2057                struct pollfd* pfd = &fds[i];
2058                fdio_t* io = ios[i];
2059
2060                if (io == NULL) {
2061                    // skip an invalid entry
2062                    continue;
2063                }
2064                if (j < nvalid) {
2065                    uint32_t events = 0;
2066                    io->ops->wait_end(io, items[j].pending, &events);
2067                    // mask unrequested events except HUP/ERR
2068                    pfd->revents = events & (pfd->events | POLLHUP | POLLERR);
2069                    if (pfd->revents != 0) {
2070                        nfds++;
2071                    }
2072                }
2073                j++;
2074            }
2075        }
2076    }
2077
2078    for (int i = 0; i <= ios_used_max; i++) {
2079        if (ios[i]) {
2080            fdio_release(ios[i]);
2081        }
2082    }
2083
2084    return (r == ZX_OK || r == ZX_ERR_TIMED_OUT) ? nfds : ERROR(r);
2085}
2086
2087__EXPORT
2088int poll(struct pollfd* fds, nfds_t n, int timeout) {
2089    struct timespec timeout_ts = {timeout / 1000, (timeout % 1000) * 1000000};
2090    struct timespec* ts = timeout >= 0 ? &timeout_ts : NULL;
2091    return ppoll(fds, n, ts, NULL);
2092}
2093
2094__EXPORT
2095int select(int n, fd_set* restrict rfds, fd_set* restrict wfds, fd_set* restrict efds,
2096           struct timeval* restrict tv) {
2097    if (n > FD_SETSIZE || n < 1) {
2098        return ERRNO(EINVAL);
2099    }
2100
2101    fdio_t* ios[n];
2102    int ios_used_max = -1;
2103
2104    zx_status_t r = ZX_OK;
2105    int nvalid = 0;
2106
2107    zx_wait_item_t items[n];
2108
2109    for (int fd = 0; fd < n; fd++) {
2110        ios[fd] = NULL;
2111
2112        uint32_t events = 0;
2113        if (rfds && FD_ISSET(fd, rfds))
2114            events |= POLLIN;
2115        if (wfds && FD_ISSET(fd, wfds))
2116            events |= POLLOUT;
2117        if (efds && FD_ISSET(fd, efds))
2118            events |= POLLERR;
2119        if (events == 0) {
2120            continue;
2121        }
2122
2123        fdio_t* io;
2124        if ((io = fd_to_io(fd)) == NULL) {
2125            r = ZX_ERR_BAD_HANDLE;
2126            break;
2127        }
2128        ios[fd] = io;
2129        ios_used_max = fd;
2130
2131        zx_handle_t h;
2132        zx_signals_t sigs;
2133        io->ops->wait_begin(io, events, &h, &sigs);
2134        if (h == ZX_HANDLE_INVALID) {
2135            r = ZX_ERR_INVALID_ARGS;
2136            break;
2137        }
2138        items[nvalid].handle = h;
2139        items[nvalid].waitfor = sigs;
2140        items[nvalid].pending = 0;
2141        nvalid++;
2142    }
2143
2144    int nfds = 0;
2145    if (r == ZX_OK && nvalid > 0) {
2146        zx_time_t tmo = (tv == NULL) ? ZX_TIME_INFINITE :
2147            zx_deadline_after(zx_duration_add_duration(ZX_SEC(tv->tv_sec), ZX_USEC(tv->tv_usec)));
2148        r = zx_object_wait_many(items, nvalid, tmo);
2149        // pending signals could be reported on ZX_ERR_TIMED_OUT case as well
2150        if (r == ZX_OK || r == ZX_ERR_TIMED_OUT) {
2151            int j = 0; // j counts up on a valid entry
2152
2153            for (int fd = 0; fd < n; fd++) {
2154                fdio_t* io = ios[fd];
2155                if (io == NULL) {
2156                    // skip an invalid entry
2157                    continue;
2158                }
2159                if (j < nvalid) {
2160                    uint32_t events = 0;
2161                    io->ops->wait_end(io, items[j].pending, &events);
2162                    if (rfds && FD_ISSET(fd, rfds)) {
2163                        if (events & POLLIN) {
2164                            nfds++;
2165                        } else {
2166                            FD_CLR(fd, rfds);
2167                        }
2168                    }
2169                    if (wfds && FD_ISSET(fd, wfds)) {
2170                        if (events & POLLOUT) {
2171                            nfds++;
2172                        } else {
2173                            FD_CLR(fd, wfds);
2174                        }
2175                    }
2176                    if (efds && FD_ISSET(fd, efds)) {
2177                        if (events & POLLERR) {
2178                            nfds++;
2179                        } else {
2180                            FD_CLR(fd, efds);
2181                        }
2182                    }
2183                } else {
2184                    if (rfds) {
2185                        FD_CLR(fd, rfds);
2186                    }
2187                    if (wfds) {
2188                        FD_CLR(fd, wfds);
2189                    }
2190                    if (efds) {
2191                        FD_CLR(fd, efds);
2192                    }
2193                }
2194                j++;
2195            }
2196        }
2197    }
2198
2199    for (int i = 0; i <= ios_used_max; i++) {
2200        if (ios[i]) {
2201            fdio_release(ios[i]);
2202        }
2203    }
2204
2205    return (r == ZX_OK || r == ZX_ERR_TIMED_OUT) ? nfds : ERROR(r);
2206}
2207
2208__EXPORT
2209int ioctl(int fd, int req, ...) {
2210    fdio_t* io;
2211    if ((io = fd_to_io(fd)) == NULL) {
2212        return ERRNO(EBADF);
2213    }
2214    va_list ap;
2215    va_start(ap, req);
2216    ssize_t r = io->ops->posix_ioctl(io, req, ap);
2217    va_end(ap);
2218    fdio_release(io);
2219    return STATUS(r);
2220}
2221
2222__EXPORT
2223ssize_t sendto(int fd, const void* buf, size_t buflen, int flags, const struct sockaddr* addr, socklen_t addrlen) {
2224    fdio_t* io = fd_to_io(fd);
2225    if (io == NULL) {
2226        return ERRNO(EBADF);
2227    }
2228    ssize_t r = io->ops->sendto(io, buf, buflen, flags, addr, addrlen);
2229    fdio_release(io);
2230    return r < 0 ? STATUS(r) : r;
2231}
2232
2233__EXPORT
2234ssize_t recvfrom(int fd, void* restrict buf, size_t buflen, int flags, struct sockaddr* restrict addr, socklen_t* restrict addrlen) {
2235    fdio_t* io = fd_to_io(fd);
2236    if (io == NULL) {
2237        return ERRNO(EBADF);
2238    }
2239    if (addr != NULL && addrlen == NULL) {
2240        return ERRNO(EFAULT);
2241    }
2242    ssize_t r = io->ops->recvfrom(io, buf, buflen, flags, addr, addrlen);
2243    fdio_release(io);
2244    return r < 0 ? STATUS(r) : r;
2245}
2246
2247__EXPORT
2248ssize_t sendmsg(int fd, const struct msghdr *msg, int flags) {
2249    fdio_t* io = fd_to_io(fd);
2250    if (io == NULL) {
2251        return ERRNO(EBADF);
2252    }
2253    ssize_t r = io->ops->sendmsg(io, msg, flags);
2254    fdio_release(io);
2255    return r < 0 ? STATUS(r) : r;
2256}
2257
2258__EXPORT
2259ssize_t recvmsg(int fd, struct msghdr* msg, int flags) {
2260    fdio_t* io = fd_to_io(fd);
2261    if (io == NULL) {
2262        return ERRNO(EBADF);
2263    }
2264    ssize_t r = io->ops->recvmsg(io, msg, flags);
2265    fdio_release(io);
2266    return r < 0 ? STATUS(r) : r;
2267}
2268
2269__EXPORT
2270int shutdown(int fd, int how) {
2271    fdio_t* io;
2272    if ((io = fd_to_io(fd)) == NULL) {
2273        return ERRNO(EBADF);
2274    }
2275    zx_status_t r = io->ops->shutdown(io, how);
2276    fdio_release(io);
2277    if (r == ZX_ERR_BAD_STATE) {
2278        return ERRNO(ENOTCONN);
2279    }
2280    if (r == ZX_ERR_WRONG_TYPE) {
2281        return ERRNO(ENOTSOCK);
2282    }
2283    return STATUS(r);
2284}
2285
2286__EXPORT
2287int fstatfs(int fd, struct statfs* buf) {
2288    fdio_t* io;
2289    if ((io = fd_to_io(fd)) == NULL) {
2290        return ERRNO(EBADF);
2291    }
2292    zx_handle_t handle = __fdio_borrow_channel(io);
2293    if (handle == ZX_HANDLE_INVALID) {
2294        fdio_release(io);
2295        return ERRNO(ENOTSUP);
2296    }
2297    zx_status_t status;
2298    fuchsia_io_FilesystemInfo info;
2299    zx_status_t io_status = fuchsia_io_DirectoryAdminQueryFilesystem(handle, &status, &info);
2300    fdio_release(io);
2301    if (io_status != ZX_OK) {
2302        return ERRNO(fdio_status_to_errno(io_status));
2303    } else if (status != ZX_OK) {
2304        return ERRNO(fdio_status_to_errno(status));
2305    }
2306
2307    info.name[fuchsia_io_MAX_FS_NAME_BUFFER - 1] = '\0';
2308
2309    struct statfs stats = {};
2310
2311    if (info.block_size) {
2312        stats.f_bsize = info.block_size;
2313        stats.f_blocks = info.total_bytes / stats.f_bsize;
2314        stats.f_bfree = stats.f_blocks - info.used_bytes / stats.f_bsize;
2315    }
2316    stats.f_bavail = stats.f_bfree;
2317    stats.f_files = info.total_nodes;
2318    stats.f_ffree = info.total_nodes - info.used_nodes;
2319    stats.f_namelen = info.max_filename_size;
2320    stats.f_type = info.fs_type;
2321    stats.f_fsid.__val[0] = info.fs_id;
2322    stats.f_fsid.__val[1] = info.fs_id >> 32;
2323
2324    *buf = stats;
2325    return 0;
2326}
2327
2328__EXPORT
2329int statfs(const char* path, struct statfs* buf) {
2330    int fd = open(path, O_RDONLY | O_CLOEXEC);
2331    if (fd < 0) {
2332        return fd;
2333    }
2334    int rv = fstatfs(fd, buf);
2335    close(fd);
2336    return rv;
2337}
2338
2339__EXPORT
2340int _fd_open_max(void) {
2341    return FDIO_MAX_FD;
2342}
2343