1// Copyright 2017 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <ctype.h>
6#include <errno.h>
7#include <fcntl.h>
8#include <limits.h>
9#include <stdarg.h>
10#include <stdio.h>
11#include <string.h>
12#include <threads.h>
13
14#include <ddk/driver.h>
15#include <driver-info/driver-info.h>
16#include <launchpad/launchpad.h>
17#include <zircon/assert.h>
18#include <zircon/processargs.h>
19#include <zircon/syscalls.h>
20#include <zircon/syscalls/policy.h>
21#include <zircon/syscalls/system.h>
22#include <zircon/device/dmctl.h>
23#include <zircon/boot/bootdata.h>
24#include <lib/fdio/io.h>
25#include <lib/zircon-internal/ktrace.h>
26
27#include "devcoordinator.h"
28#include "devhost.h"
29#include "devmgr.h"
30#include "log.h"
31#include "memfs-private.h"
32
33static void dc_driver_added(driver_t* drv, const char* version);
34static void dc_driver_added_init(driver_t* drv, const char* version);
35
36
37#define BOOT_FIRMWARE_DIR "/boot/lib/firmware"
38#define SYSTEM_FIRMWARE_DIR "/system/lib/firmware"
39
40extern zx_handle_t virtcon_open;
41
42uint32_t log_flags = LOG_ERROR | LOG_INFO;
43
44bool dc_asan_drivers = false;
45bool dc_launched_first_devhost = false;
46
47static zx_handle_t bootdata_vmo;
48
49static void dc_dump_state();
50static void dc_dump_devprops();
51static void dc_dump_drivers();
52
53typedef struct {
54    zx_status_t status;
55    uint32_t flags;
56#define RUNNING 0
57#define SUSPEND 1
58    uint32_t sflags;    // suspend flags
59    uint32_t count;     // outstanding msgs
60    devhost_t* dh;      // next devhost to process
61    list_node_t devhosts;
62
63    zx_handle_t socket; // socket to notify on for 'dm reboot' and 'dm poweroff'
64
65    // mexec arguments
66    zx_handle_t kernel;
67    zx_handle_t bootdata;
68} suspend_context_t;
69static suspend_context_t suspend_ctx = []() {
70    suspend_context_t suspend = {};
71    suspend.devhosts = LIST_INITIAL_VALUE(suspend_ctx.devhosts);
72    return suspend;
73}();
74
75typedef struct {
76    list_node_t node;
77    uint32_t type;
78    uint32_t length;
79    bool has_path;      // zero terminated string starts at data[length]
80    uint8_t data[];
81} dc_metadata_t;
82
83static list_node_t published_metadata = LIST_INITIAL_VALUE(published_metadata);
84
85static bool dc_in_suspend() {
86    return !!suspend_ctx.flags;
87}
88static void dc_suspend(uint32_t flags);
89static void dc_mexec(zx_handle_t* h);
90static void dc_continue_suspend(suspend_context_t* ctx);
91
92static bool suspend_fallback = false;
93static bool suspend_debug = false;
94
95static device_t root_device = []() {
96    device_t device = {};
97    device.flags = DEV_CTX_IMMORTAL | DEV_CTX_MUST_ISOLATE | DEV_CTX_MULTI_BIND;
98    device.protocol_id = ZX_PROTOCOL_ROOT;
99    device.name = "root";
100    device.libname = "";
101    device.args = "root,";
102    device.children = LIST_INITIAL_VALUE(root_device.children);
103    device.pending = LIST_INITIAL_VALUE(root_device.pending);
104    device.metadata = LIST_INITIAL_VALUE(root_device.metadata);
105    device.refcount = 1;
106    return device;
107}();
108
109static device_t misc_device = []() {
110    device_t device = {};
111    device.parent = &root_device;
112    device.flags = DEV_CTX_IMMORTAL | DEV_CTX_MUST_ISOLATE | DEV_CTX_MULTI_BIND;
113    device.protocol_id = ZX_PROTOCOL_MISC_PARENT;
114    device.name = "misc";
115    device.libname = "";
116    device.args = "misc,";
117    device.children = LIST_INITIAL_VALUE(misc_device.children);
118    device.pending = LIST_INITIAL_VALUE(misc_device.pending);
119    device.metadata = LIST_INITIAL_VALUE(misc_device.metadata);
120    device.refcount = 1;
121    return device;
122}();
123
124static device_t sys_device = []() {
125    device_t device = {};
126    device.parent = &root_device;
127    device.flags = DEV_CTX_IMMORTAL | DEV_CTX_MUST_ISOLATE;
128    device.name = "sys";
129    device.libname = "";
130    device.args = "sys,";
131    device.children = LIST_INITIAL_VALUE(sys_device.children);
132    device.pending = LIST_INITIAL_VALUE(sys_device.pending);
133    device.metadata = LIST_INITIAL_VALUE(sys_device.metadata);
134    device.refcount = 1;
135    return device;
136}();
137
138static device_t test_device = []() {
139    device_t device = {};
140    device.parent = &root_device;
141    device.flags = DEV_CTX_IMMORTAL | DEV_CTX_MUST_ISOLATE | DEV_CTX_MULTI_BIND;
142    device.protocol_id = ZX_PROTOCOL_TEST_PARENT;
143    device.name = "test";
144    device.libname = "";
145    device.args = "test,";
146    device.children = LIST_INITIAL_VALUE(test_device.children);
147    device.pending = LIST_INITIAL_VALUE(test_device.pending);
148    device.metadata = LIST_INITIAL_VALUE(test_device.metadata);
149    device.refcount = 1;
150    return device;
151}();
152
153
154static zx_handle_t dmctl_socket;
155
156static void dmprintf(const char* fmt, ...) {
157    if (dmctl_socket == ZX_HANDLE_INVALID) {
158        return;
159    }
160    char buf[1024];
161    va_list ap;
162    va_start(ap, fmt);
163    vsnprintf(buf, sizeof(buf), fmt, ap);
164    va_end(ap);
165    size_t actual;
166    if (zx_socket_write(dmctl_socket, 0, buf, strlen(buf), &actual) < 0) {
167        zx_handle_close(dmctl_socket);
168        dmctl_socket = ZX_HANDLE_INVALID;
169    }
170}
171
172static zx_status_t handle_dmctl_write(size_t len, const char* cmd) {
173    if (len == 4) {
174        if (!memcmp(cmd, "dump", 4)) {
175            dc_dump_state();
176            return ZX_OK;
177        }
178        if (!memcmp(cmd, "help", 4)) {
179            dmprintf("dump              - dump device tree\n"
180                     "poweroff          - power off the system\n"
181                     "shutdown          - power off the system\n"
182                     "suspend           - suspend the system to RAM\n"
183                     "reboot            - reboot the system\n"
184                     "reboot-bootloader - reboot the system into boatloader\n"
185                     "reboot-recovery   - reboot the system into recovery\n"
186                     "kerneldebug       - send a command to the kernel\n"
187                     "ktraceoff         - stop kernel tracing\n"
188                     "ktraceon          - start kernel tracing\n"
189                     "devprops          - dump published devices and their binding properties\n"
190                     "drivers           - list discovered drivers and their properties\n"
191                     );
192            return ZX_OK;
193        }
194    }
195    if ((len == 7) && !memcmp(cmd, "drivers", 7)) {
196        dc_dump_drivers();
197        return ZX_OK;
198    }
199    if (len == 8) {
200        if (!memcmp(cmd, "ktraceon", 8)) {
201            zx_ktrace_control(get_root_resource(), KTRACE_ACTION_START, KTRACE_GRP_ALL, nullptr);
202            return ZX_OK;
203        }
204        if (!memcmp(cmd, "devprops", 8)) {
205            dc_dump_devprops();
206            return ZX_OK;
207        }
208    }
209    if ((len == 9) && (!memcmp(cmd, "ktraceoff", 9))) {
210        zx_ktrace_control(get_root_resource(), KTRACE_ACTION_STOP, 0, nullptr);
211        zx_ktrace_control(get_root_resource(), KTRACE_ACTION_REWIND, 0, nullptr);
212        return ZX_OK;
213    }
214    if ((len > 12) && !memcmp(cmd, "kerneldebug ", 12)) {
215        return zx_debug_send_command(get_root_resource(), cmd + 12, len - 12);
216    }
217
218    if (dc_in_suspend()) {
219        log(ERROR, "devcoord: rpc: dm-command \"%.*s\" forbidden in suspend\n",
220            static_cast<uint32_t>(len), cmd);
221        return ZX_ERR_BAD_STATE;
222    }
223
224    if ((len == 6) && !memcmp(cmd, "reboot", 6)) {
225        devmgr_vfs_exit();
226        dc_suspend(DEVICE_SUSPEND_FLAG_REBOOT);
227        return ZX_OK;
228    }
229    if ((len == 17) && !memcmp(cmd, "reboot-bootloader", 17)) {
230        devmgr_vfs_exit();
231        dc_suspend(DEVICE_SUSPEND_FLAG_REBOOT_BOOTLOADER);
232        return ZX_OK;
233    }
234    if ((len == 15) && !memcmp(cmd, "reboot-recovery", 15)) {
235        devmgr_vfs_exit();
236        dc_suspend(DEVICE_SUSPEND_FLAG_REBOOT_RECOVERY);
237        return ZX_OK;
238    }
239    if ((len == 7) && !memcmp(cmd, "suspend", 7)) {
240        dc_suspend(DEVICE_SUSPEND_FLAG_SUSPEND_RAM);
241        return ZX_OK;
242    }
243    if (len == 8 && (!memcmp(cmd, "poweroff", 8) || !memcmp(cmd, "shutdown", 8))) {
244        devmgr_vfs_exit();
245        dc_suspend(DEVICE_SUSPEND_FLAG_POWEROFF);
246        return ZX_OK;
247    }
248    if ((len > 11) && !memcmp(cmd, "add-driver:", 11)) {
249        len -= 11;
250        char path[len + 1];
251        memcpy(path, cmd + 11, len);
252        path[len] = 0;
253        load_driver(path, dc_driver_added);
254        return ZX_OK;
255    }
256    dmprintf("unknown command\n");
257    log(ERROR, "dmctl: unknown command '%.*s'\n", (int) len, cmd);
258    return ZX_ERR_NOT_SUPPORTED;
259}
260
261static zx_status_t dc_handle_device(port_handler_t* ph, zx_signals_t signals, uint32_t evt);
262static zx_status_t dc_attempt_bind(driver_t* drv, device_t* dev);
263
264static bool dc_running;
265
266static zx_handle_t dc_watch_channel;
267
268static zx_handle_t devhost_job;
269port_t dc_port;
270
271// All Drivers
272static list_node_t list_drivers = LIST_INITIAL_VALUE(list_drivers);
273
274// Drivers to add to All Drivers
275static list_node_t list_drivers_new = LIST_INITIAL_VALUE(list_drivers_new);
276
277// Drivers to try last
278static list_node_t list_drivers_fallback = LIST_INITIAL_VALUE(list_drivers_fallback);
279
280// All Devices (excluding static immortal devices)
281static list_node_t list_devices = LIST_INITIAL_VALUE(list_devices);
282
283// All DevHosts
284static list_node_t list_devhosts = LIST_INITIAL_VALUE(list_devhosts);
285
286static driver_t* libname_to_driver(const char* libname) {
287    driver_t* drv;
288    list_for_every_entry(&list_drivers, drv, driver_t, node) {
289        if (!strcmp(libname, drv->libname)) {
290            return drv;
291        }
292    }
293    return nullptr;
294}
295
296static zx_status_t load_vmo(const char* libname, zx_handle_t* out) {
297    int fd = open(libname, O_RDONLY);
298    if (fd < 0) {
299        log(ERROR, "devcoord: cannot open driver '%s'\n", libname);
300        return ZX_ERR_IO;
301    }
302    zx_status_t r = fdio_get_vmo_clone(fd, out);
303    close(fd);
304    if (r < 0) {
305        log(ERROR, "devcoord: cannot get driver vmo '%s'\n", libname);
306    }
307    const char* vmo_name = strrchr(libname, '/');
308    if (vmo_name != nullptr) {
309        ++vmo_name;
310    } else {
311        vmo_name = libname;
312    }
313    zx_object_set_property(*out, ZX_PROP_NAME, vmo_name, strlen(vmo_name));
314    return r;
315}
316
317static zx_status_t libname_to_vmo(const char* libname, zx_handle_t* out) {
318    driver_t* drv = libname_to_driver(libname);
319    if (drv == nullptr) {
320        log(ERROR, "devcoord: cannot find driver '%s'\n", libname);
321        return ZX_ERR_NOT_FOUND;
322    }
323
324    // Check for cached DSO
325    if (drv->dso_vmo != ZX_HANDLE_INVALID) {
326        zx_status_t r = zx_handle_duplicate(drv->dso_vmo,
327                                            ZX_RIGHTS_BASIC | ZX_RIGHTS_PROPERTY |
328                                            ZX_RIGHT_READ | ZX_RIGHT_EXECUTE | ZX_RIGHT_MAP,
329                                            out);
330        if (r != ZX_OK) {
331            log(ERROR, "devcoord: cannot duplicate cached dso for '%s' '%s'\n", drv->name, libname);
332        }
333        return r;
334    } else {
335        return load_vmo(libname, out);
336    }
337}
338
339void devmgr_set_bootdata(zx_handle_t vmo) {
340    if (bootdata_vmo == ZX_HANDLE_INVALID) {
341        zx_handle_duplicate(vmo, ZX_RIGHT_SAME_RIGHTS, &bootdata_vmo);
342    }
343}
344
345static void dc_dump_device(device_t* dev, size_t indent) {
346    zx_koid_t pid = dev->host ? dev->host->koid : 0;
347    char extra[256];
348    if (log_flags & LOG_DEVLC) {
349        snprintf(extra, sizeof(extra), " dev=%p ref=%d", dev, dev->refcount);
350    } else {
351        extra[0] = 0;
352    }
353    if (pid == 0) {
354        dmprintf("%*s[%s]%s\n", (int) (indent * 3), "", dev->name, extra);
355    } else {
356        dmprintf("%*s%c%s%c pid=%zu%s %s\n",
357                 (int) (indent * 3), "",
358                 dev->flags & DEV_CTX_PROXY ? '<' : '[',
359                 dev->name,
360                 dev->flags & DEV_CTX_PROXY ? '>' : ']',
361                 pid, extra,
362                 dev->libname ? dev->libname : "");
363    }
364    device_t* child;
365    if (dev->proxy) {
366        indent++;
367        dc_dump_device(dev->proxy, indent);
368    }
369    list_for_every_entry(&dev->children, child, device_t, node) {
370        dc_dump_device(child, indent + 1);
371    }
372}
373
374static void dc_dump_state() {
375    dc_dump_device(&root_device, 0);
376    dc_dump_device(&misc_device, 1);
377    dc_dump_device(&sys_device, 1);
378    dc_dump_device(&test_device, 1);
379}
380
381static void dc_dump_device_props(device_t* dev) {
382    if (dev->host) {
383        dmprintf("Name [%s]%s%s%s\n",
384                 dev->name,
385                 dev->libname ? " Driver [" : "",
386                 dev->libname ? dev->libname : "",
387                 dev->libname ? "]" : "");
388        dmprintf("Flags   :%s%s%s%s%s%s%s\n",
389                 dev->flags & DEV_CTX_IMMORTAL     ? " Immortal"  : "",
390                 dev->flags & DEV_CTX_MUST_ISOLATE ? " Isolate"   : "",
391                 dev->flags & DEV_CTX_MULTI_BIND   ? " MultiBind" : "",
392                 dev->flags & DEV_CTX_BOUND        ? " Bound"     : "",
393                 dev->flags & DEV_CTX_DEAD         ? " Dead"      : "",
394                 dev->flags & DEV_CTX_ZOMBIE       ? " Zombie"    : "",
395                 dev->flags & DEV_CTX_PROXY        ? " Proxy"     : "");
396
397        char a = (char)((dev->protocol_id >> 24) & 0xFF);
398        char b = (char)((dev->protocol_id >> 16) & 0xFF);
399        char c = (char)((dev->protocol_id >> 8) & 0xFF);
400        char d = (char)(dev->protocol_id & 0xFF);
401        dmprintf("ProtoId : '%c%c%c%c' 0x%08x(%u)\n",
402                 isprint(a) ? a : '.',
403                 isprint(b) ? b : '.',
404                 isprint(c) ? c : '.',
405                 isprint(d) ? d : '.',
406                 dev->protocol_id,
407                 dev->protocol_id);
408
409        dmprintf("%u Propert%s\n", dev->prop_count, dev->prop_count == 1 ? "y" : "ies");
410        for (uint32_t i = 0; i < dev->prop_count; ++i) {
411            const zx_device_prop_t* p = dev->props + i;
412            const char* param_name = di_bind_param_name(p->id);
413
414            if (param_name) {
415                dmprintf("[%2u/%2u] : Value 0x%08x Id %s\n",
416                         i, dev->prop_count, p->value, param_name);
417            } else {
418                dmprintf("[%2u/%2u] : Value 0x%08x Id 0x%04hx\n",
419                         i, dev->prop_count, p->value, p->id);
420            }
421        }
422        dmprintf("\n");
423    }
424
425    device_t* child;
426    if (dev->proxy) {
427        dc_dump_device_props(dev->proxy);
428    }
429    list_for_every_entry(&dev->children, child, device_t, node) {
430        dc_dump_device_props(child);
431    }
432}
433
434static void dc_dump_devprops() {
435    dc_dump_device_props(&root_device);
436    dc_dump_device_props(&misc_device);
437    dc_dump_device_props(&sys_device);
438    dc_dump_device_props(&test_device);
439}
440
441static void dc_dump_drivers() {
442    driver_t* drv;
443    bool first = true;
444    list_for_every_entry(&list_drivers, drv, driver_t, node) {
445        dmprintf("%sName    : %s\n", first ? "" : "\n", drv->name);
446        dmprintf("Driver  : %s\n", drv->libname ? drv->libname : "(null)");
447        dmprintf("Flags   : 0x%08x\n", drv->flags);
448        if (drv->binding_size) {
449            char line[256];
450            uint32_t count = drv->binding_size / static_cast<uint32_t>(sizeof(drv->binding[0]));
451            dmprintf("Binding : %u instruction%s (%u bytes)\n",
452                     count, (count == 1) ? "" : "s", drv->binding_size);
453            for (uint32_t i = 0; i < count; ++i) {
454                di_dump_bind_inst(drv->binding + i, line, sizeof(line));
455                dmprintf("[%u/%u]: %s\n", i + 1, count, line);
456            }
457        }
458        first = false;
459    }
460}
461
462static void dc_handle_new_device(device_t* dev);
463static void dc_handle_new_driver();
464
465static list_node_t list_pending_work = LIST_INITIAL_VALUE(list_pending_work);
466static list_node_t list_unbound_devices = LIST_INITIAL_VALUE(list_unbound_devices);
467
468static void queue_work(work_t* work, dc_work::Op op, uint32_t arg) {
469    ZX_ASSERT(work->op == dc_work::Op::kIdle);
470    work->op = op;
471    work->arg = arg;
472    list_add_tail(&list_pending_work, &work->node);
473}
474
475static void cancel_work(work_t* work) {
476    if (work->op != dc_work::Op::kIdle) {
477        list_delete(&work->node);
478        work->op = dc_work::Op::kIdle;
479    }
480}
481
482static void process_work(work_t* work) {
483    dc_work::Op op = work->op;
484    work->op = dc_work::Op::kIdle;
485
486    switch (op) {
487    case dc_work::Op::kDeviceAdded: {
488        device_t* dev = containerof(work, device_t, work);
489        dc_handle_new_device(dev);
490        break;
491    }
492    case dc_work::Op::kDriverAdded: {
493        dc_handle_new_driver();
494        break;
495    }
496    default:
497        log(ERROR, "devcoord: unknown work: op=%u\n", static_cast<uint32_t>(op));
498    }
499}
500
501static const char* get_devhost_bin() {
502    // If there are any ASan drivers, use the ASan-supporting devhost for
503    // all drivers because even a devhost launched initially with just a
504    // non-ASan driver might later load an ASan driver.  One day we might
505    // be able to be more flexible about which drivers must get loaded into
506    // the same devhost and thus be able to use both ASan and non-ASan
507    // devhosts at the same time when only a subset of drivers use ASan.
508    if (dc_asan_drivers)
509        return "/boot/bin/devhost.asan";
510    return "/boot/bin/devhost";
511}
512
513zx_handle_t get_service_root();
514
515static zx_status_t dc_get_topo_path(device_t* dev, char* out, size_t max) {
516    char tmp[max];
517    char* path = tmp + max - 1;
518    *path = 0;
519    size_t total = 1;
520
521    while (dev != nullptr) {
522        if (dev->flags & DEV_CTX_PROXY) {
523            dev = dev->parent;
524        }
525        const char* name;
526
527        if (dev->parent) {
528            name = dev->name;
529        } else if (!strcmp(misc_device.name, dev->name)) {
530            name = "dev/misc";
531        } else if (!strcmp(sys_device.name, dev->name)) {
532            name = "dev/sys";
533        } else if (!strcmp(sys_device.name, dev->name)) {
534            name = "dev/test";
535        } else {
536            name = "dev";
537        }
538        size_t len = strlen(name) + 1;
539        if (len > (max - total)) {
540            return ZX_ERR_BUFFER_TOO_SMALL;
541        }
542        memcpy(path - len + 1, name, len - 1);
543        path -= len;
544        *path = '/';
545        total += len;
546        dev = dev->parent;
547    }
548
549    memcpy(out, path, total);
550    return ZX_OK;
551}
552
553//TODO: use a better device identifier
554static zx_status_t dc_notify(device_t* dev, uint32_t op) {
555    if (dc_watch_channel == ZX_HANDLE_INVALID) {
556        return ZX_ERR_BAD_STATE;
557    }
558    zx_status_t r;
559    if (op == DEVMGR_OP_DEVICE_ADDED) {
560        size_t propslen = sizeof(zx_device_prop_t) * dev->prop_count;
561        size_t len = sizeof(devmgr_event_t) + propslen;
562        char msg[len + DC_PATH_MAX];
563        auto evt = reinterpret_cast<devmgr_event_t*>(msg);
564        memset(evt, 0, sizeof(devmgr_event_t));
565        memcpy(msg + sizeof(devmgr_event_t), dev->props, propslen);
566        if (dc_get_topo_path(dev, msg + len, DC_PATH_MAX) < 0) {
567            return ZX_OK;
568        }
569        size_t pathlen = strlen(msg + len);
570        len += pathlen;
571        evt->opcode = op;
572        if (dev->flags & DEV_CTX_BOUND) {
573            evt->flags |= DEVMGR_FLAGS_BOUND;
574        }
575        evt->id = (uintptr_t) dev;
576        evt->u.add.protocol_id = dev->protocol_id;
577        evt->u.add.props_len = static_cast<uint32_t>(propslen);
578        evt->u.add.path_len = static_cast<uint32_t>(pathlen);
579        r = zx_channel_write(dc_watch_channel, 0, msg, static_cast<uint32_t>(len), nullptr, 0);
580    } else {
581        devmgr_event_t evt;
582        memset(&evt, 0, sizeof(evt));
583        evt.opcode = op;
584        if (dev->flags & DEV_CTX_BOUND) {
585            evt.flags |= DEVMGR_FLAGS_BOUND;
586        }
587        evt.id = (uintptr_t) dev;
588        r = zx_channel_write(dc_watch_channel, 0, &evt, sizeof(evt), nullptr, 0);
589    }
590    if (r < 0) {
591        zx_handle_close(dc_watch_channel);
592        dc_watch_channel = ZX_HANDLE_INVALID;
593    }
594    return r;
595}
596
597static void dc_watch(zx_handle_t h) {
598    if (dc_watch_channel != ZX_HANDLE_INVALID) {
599        zx_handle_close(dc_watch_channel);
600    }
601    dc_watch_channel = h;
602    device_t* dev;
603    list_for_every_entry(&list_devices, dev, device_t, anode) {
604        if (dev->flags & (DEV_CTX_DEAD | DEV_CTX_ZOMBIE)) {
605            // if device is dead, ignore it
606            continue;
607        }
608        if (dc_notify(dev, DEVMGR_OP_DEVICE_ADDED) < 0) {
609            break;
610        }
611    }
612}
613
614static zx_status_t dc_launch_devhost(devhost_t* host,
615                                     const char* name, zx_handle_t hrpc) {
616    const char* devhost_bin = get_devhost_bin();
617
618    launchpad_t* lp;
619    launchpad_create_with_jobs(devhost_job, 0, name, &lp);
620    launchpad_load_from_file(lp, devhost_bin);
621    launchpad_set_args(lp, 1, &devhost_bin);
622
623    launchpad_add_handle(lp, hrpc, PA_HND(PA_USER0, 0));
624
625    zx_handle_t h;
626    //TODO: limit root resource to root devhost only
627    zx_handle_duplicate(get_root_resource(), ZX_RIGHT_SAME_RIGHTS, &h);
628    launchpad_add_handle(lp, h, PA_HND(PA_RESOURCE, 0));
629
630    // Inherit devmgr's environment (including kernel cmdline)
631    launchpad_clone(lp, LP_CLONE_ENVIRON);
632
633    const char* nametable[2] = { "/boot", "/svc", };
634    uint32_t name_count = 0;
635
636    //TODO: eventually devhosts should not have vfs access
637    launchpad_add_handle(lp, fs_clone("boot").release(),
638                         PA_HND(PA_NS_DIR, name_count++));
639
640    //TODO: constrain to /svc/device
641    if ((h = fs_clone("svc").release()) != ZX_HANDLE_INVALID) {
642        launchpad_add_handle(lp, h, PA_HND(PA_NS_DIR, name_count++));
643    }
644
645    launchpad_set_nametable(lp, name_count, nametable);
646
647    //TODO: limit root job access to root devhost only
648    launchpad_add_handle(lp, get_sysinfo_job_root(),
649                         PA_HND(PA_USER0, ID_HJOBROOT));
650
651    const char* errmsg;
652    zx_status_t status = launchpad_go(lp, &host->proc, &errmsg);
653    if (status < 0) {
654        log(ERROR, "devcoord: launch devhost '%s': failed: %d: %s\n",
655            name, status, errmsg);
656        return status;
657    }
658    zx_info_handle_basic_t info;
659    if (zx_object_get_info(host->proc, ZX_INFO_HANDLE_BASIC, &info,
660                           sizeof(info), nullptr, nullptr) == ZX_OK) {
661        host->koid = info.koid;
662    }
663    log(INFO, "devcoord: launch devhost '%s': pid=%zu\n",
664        name, host->koid);
665
666    dc_launched_first_devhost = true;
667
668    return ZX_OK;
669}
670
671static zx_status_t dc_new_devhost(const char* name, devhost_t* parent,
672                                  devhost_t** out) {
673    devhost_t* dh = static_cast<devhost_t*>(calloc(1, sizeof(devhost_t)));
674    if (dh == nullptr) {
675        return ZX_ERR_NO_MEMORY;
676    }
677    new (dh) devhost_t;
678
679    zx_handle_t hrpc;
680    zx_status_t r;
681    if ((r = zx_channel_create(0, &hrpc, &dh->hrpc)) < 0) {
682        free(dh);
683        return r;
684    }
685
686    if ((r = dc_launch_devhost(dh, name, hrpc)) < 0) {
687        zx_handle_close(dh->hrpc);
688        free(dh);
689        return r;
690    }
691
692    list_initialize(&dh->devices);
693    list_initialize(&dh->children);
694
695    if (parent) {
696        dh->parent = parent;
697        dh->parent->refcount++;
698        list_add_tail(&dh->parent->children, &dh->node);
699    }
700    list_add_tail(&list_devhosts, &dh->anode);
701
702    log(DEVLC, "devcoord: new host %p\n", dh);
703
704    *out = dh;
705    return ZX_OK;
706}
707
708static void dc_release_devhost(devhost_t* dh) {
709    dh->refcount--;
710    if (dh->refcount > 0) {
711        return;
712    }
713    log(INFO, "devcoord: destroy host %p\n", dh);
714    devhost_t* parent = dh->parent;
715    if (parent != nullptr) {
716        dh->parent = nullptr;
717        list_delete(&dh->node);
718        dc_release_devhost(parent);
719    }
720    list_delete(&dh->anode);
721    zx_handle_close(dh->hrpc);
722    zx_task_kill(dh->proc);
723    zx_handle_close(dh->proc);
724    free(dh);
725}
726
727// called when device children or proxys are removed
728static void dc_release_device(device_t* dev) {
729    log(DEVLC, "devcoord: release dev %p name='%s' ref=%d\n", dev, dev->name, dev->refcount);
730
731    dev->refcount--;
732    if (dev->refcount > 0) {
733        return;
734    }
735
736    // Immortal devices are never destroyed
737    if (dev->flags & DEV_CTX_IMMORTAL) {
738        return;
739    }
740
741    log(DEVLC, "devcoord: destroy dev %p name='%s'\n", dev, dev->name);
742
743    devfs_unpublish(dev);
744
745    if (dev->hrpc != ZX_HANDLE_INVALID) {
746        zx_handle_close(dev->hrpc);
747        dev->hrpc = ZX_HANDLE_INVALID;
748        dev->ph.handle = ZX_HANDLE_INVALID;
749    }
750    dev->host = nullptr;
751
752    cancel_work(&dev->work);
753
754    dc_metadata_t* md;
755    while ((md = list_remove_head_type(&dev->metadata, dc_metadata_t, node)) != nullptr) {
756        if (md->has_path) {
757            // return to published_metadata list
758            list_add_tail(&published_metadata, &md->node);
759        } else {
760            // metadata was attached directly to this device, so we free it here
761            free(md);
762        }
763    }
764
765    //TODO: cancel any pending rpc responses
766    free(dev);
767}
768
769// Add a new device to a parent device (same devhost)
770// New device is published in devfs.
771// Caller closes handles on error, so we don't have to.
772static zx_status_t dc_add_device(device_t* parent, zx_handle_t hrpc,
773                                 dc_msg_t* msg, const char* name,
774                                 const char* args, const void* data,
775                                 bool invisible) {
776    if (msg->datalen % sizeof(zx_device_prop_t)) {
777        return ZX_ERR_INVALID_ARGS;
778    }
779    device_t* dev;
780    // allocate device struct, followed by space for props, followed
781    // by space for bus arguments, followed by space for the name
782    size_t sz = sizeof(*dev) + msg->datalen + msg->argslen + msg->namelen + 2;
783    if ((dev = static_cast<device_t*>(calloc(1, sz))) == nullptr) {
784        return ZX_ERR_NO_MEMORY;
785    }
786    new (dev) device_t;
787    list_initialize(&dev->children);
788    list_initialize(&dev->pending);
789    list_initialize(&dev->metadata);
790    dev->hrpc = hrpc;
791    dev->prop_count = static_cast<uint32_t>(msg->datalen / sizeof(zx_device_prop_t));
792    dev->protocol_id = msg->protocol_id;
793
794    char* text = (char*) (dev->props + dev->prop_count);
795    memcpy(text, args, msg->argslen + 1);
796    dev->args = text;
797
798    text += msg->argslen + 1;
799    memcpy(text, name, msg->namelen + 1);
800
801    char* text2 = strchr(text, ',');
802    if (text2 != nullptr) {
803        *text2++ = 0;
804        dev->name = text2;
805        dev->libname = text;
806    } else {
807        dev->name = text;
808        dev->libname = "";
809    }
810
811    memcpy(dev->props, data, msg->datalen);
812
813    if (strlen(dev->name) > ZX_DEVICE_NAME_MAX) {
814        free(dev);
815        return ZX_ERR_INVALID_ARGS;
816    }
817
818    // If we have bus device args we are,
819    // by definition, a bus device.
820    if (args[0]) {
821        dev->flags |= DEV_CTX_MUST_ISOLATE;
822    }
823
824    // We exist within our parent's device host
825    dev->host = parent->host;
826
827    // If our parent is a proxy, for the purpose
828    // of devicefs, we need to work with *its* parent
829    // which is the device that it is proxying.
830    if (parent->flags & DEV_CTX_PROXY) {
831        parent = parent->parent;
832    }
833    dev->parent = parent;
834
835    // We must mark the device as invisible before publishing so
836    // that we don't send "device added" notifications.
837    if (invisible) {
838        dev->flags |= DEV_CTX_INVISIBLE;
839    }
840
841    zx_status_t r;
842    if ((r = devfs_publish(parent, dev)) < 0) {
843        free(dev);
844        return r;
845    }
846
847    dev->ph.handle = hrpc;
848    dev->ph.waitfor = ZX_CHANNEL_READABLE | ZX_CHANNEL_PEER_CLOSED;
849    dev->ph.func = dc_handle_device;
850    if ((r = port_wait(&dc_port, &dev->ph)) < 0) {
851        devfs_unpublish(dev);
852        free(dev);
853        return r;
854    }
855
856    if (dev->host) {
857        //TODO host == nullptr should be impossible
858        dev->host->refcount++;
859        list_add_tail(&dev->host->devices, &dev->dhnode);
860    }
861    dev->refcount = 1;
862    list_add_tail(&parent->children, &dev->node);
863    parent->refcount++;
864
865    list_add_tail(&list_devices, &dev->anode);
866
867    log(DEVLC, "devcoord: dev %p name='%s' ++ref=%d (child)\n",
868        parent, parent->name, parent->refcount);
869
870    log(DEVLC, "devcoord: publish %p '%s' props=%u args='%s' parent=%p\n",
871        dev, dev->name, dev->prop_count, dev->args, dev->parent);
872
873    if (!invisible) {
874        dc_notify(dev, DEVMGR_OP_DEVICE_ADDED);
875        queue_work(&dev->work, dc_work::Op::kDeviceAdded, 0);
876    }
877    return ZX_OK;
878}
879
880static zx_status_t dc_make_visible(device_t* dev) {
881    if (dev->flags & DEV_CTX_DEAD) {
882        return ZX_ERR_BAD_STATE;
883    }
884    if (dev->flags & DEV_CTX_INVISIBLE) {
885        dev->flags &= ~DEV_CTX_INVISIBLE;
886        devfs_advertise(dev);
887        dc_notify(dev, DEVMGR_OP_DEVICE_ADDED);
888        queue_work(&dev->work, dc_work::Op::kDeviceAdded, 0);
889    }
890    return ZX_OK;
891}
892
893// Remove device from parent
894// forced indicates this is removal due to a channel close
895// or process exit, which means we should remove all other
896// devices that share the devhost at the same time
897static zx_status_t dc_remove_device(device_t* dev, bool forced) {
898    if (dev->flags & DEV_CTX_ZOMBIE) {
899        // This device was removed due to its devhost dying
900        // (process exit or some other channel on that devhost
901        // closing), and is now receiving the final remove call
902        dev->flags &= (~DEV_CTX_ZOMBIE);
903        dc_release_device(dev);
904        return ZX_OK;
905    }
906    if (dev->flags & DEV_CTX_DEAD) {
907        // This should not happen
908        log(ERROR, "devcoord: cannot remove dev %p name='%s' twice!\n", dev, dev->name);
909        return ZX_ERR_BAD_STATE;
910    }
911    if (dev->flags & DEV_CTX_IMMORTAL) {
912        // This too should not happen
913        log(ERROR, "devcoord: cannot remove dev %p name='%s' (immortal)\n", dev, dev->name);
914        return ZX_ERR_BAD_STATE;
915    }
916
917    log(DEVLC, "devcoord: remove %p name='%s' parent=%p\n", dev, dev->name, dev->parent);
918    dev->flags |= DEV_CTX_DEAD;
919
920    // remove from devfs, preventing further OPEN attempts
921    devfs_unpublish(dev);
922
923    if (dev->proxy) {
924        dc_msg_t msg;
925        uint32_t mlen;
926        zx_status_t r;
927        if ((r = dc_msg_pack(&msg, &mlen, nullptr, 0, nullptr, nullptr)) < 0) {
928            log(ERROR, "devcoord: dc_msg_pack failed in dc_remove_device\n");
929        } else {
930            msg.txid = 0;
931            msg.op = dc_msg_t::Op::kRemoveDevice;
932            if ((r = zx_channel_write(dev->proxy->hrpc, 0, &msg, mlen, nullptr, 0)) != ZX_OK) {
933            log(ERROR, "devcoord: zx_channel_write failed in dc_remove_devicey\n");
934            }
935        }
936    }
937
938    // detach from devhost
939    devhost_t* dh = dev->host;
940    if (dh != nullptr) {
941        dev->host = nullptr;
942        list_delete(&dev->dhnode);
943
944        // If we are responding to a disconnect,
945        // we'll remove all the other devices on this devhost too.
946        // A side-effect of this is that the devhost will be released,
947        // as well as any proxy devices.
948        if (forced) {
949            dh->flags |= DEV_HOST_DYING;
950
951            device_t* next;
952            device_t* last = nullptr;
953            while ((next = list_peek_head_type(&dh->devices, device_t, dhnode)) != nullptr) {
954                if (last == next) {
955                    // This shouldn't be possbile, but let's not infinite-loop if it happens
956                    log(ERROR, "devcoord: fatal: failed to remove dev %p from devhost\n", next);
957                    exit(1);
958                }
959                dc_remove_device(next, false);
960                last = next;
961            }
962
963            //TODO: set a timer so if this devhost does not finish dying
964            //      in a reasonable amount of time, we fix the glitch.
965        }
966
967        dc_release_devhost(dh);
968    }
969
970    // if we have a parent, disconnect and downref it
971    device_t* parent = dev->parent;
972    if (parent != nullptr) {
973        dev->parent = nullptr;
974        if (dev->flags & DEV_CTX_PROXY) {
975            parent->proxy = nullptr;
976        } else {
977            list_delete(&dev->node);
978            if (list_is_empty(&parent->children)) {
979                parent->flags &= (~DEV_CTX_BOUND);
980
981                //TODO: This code is to cause the bind process to
982                //      restart and get a new devhost to be launched
983                //      when a devhost dies.  It should probably be
984                //      more tied to devhost teardown than it is.
985
986                // IF we are the last child of our parent
987                // AND our parent is not itself dead
988                // AND our parent is a BUSDEV
989                // AND our parent's devhost is not dying
990                // THEN we will want to rebind our parent
991                if (!(parent->flags & DEV_CTX_DEAD) &&
992                    (parent->flags & DEV_CTX_MUST_ISOLATE) &&
993                    ((parent->host == nullptr) || !(parent->host->flags & DEV_HOST_DYING))) {
994
995                    log(DEVLC, "devcoord: bus device %p name='%s' is unbound\n",
996                        parent, parent->name);
997
998                    //TODO: introduce timeout, exponential backoff
999                    queue_work(&parent->work, dc_work::Op::kDeviceAdded, 0);
1000                }
1001            }
1002        }
1003        dc_release_device(parent);
1004    }
1005
1006    if (!(dev->flags & DEV_CTX_PROXY)) {
1007        // remove from list of all devices
1008        list_delete(&dev->anode);
1009        dc_notify(dev, DEVMGR_OP_DEVICE_REMOVED);
1010    }
1011
1012    if (forced) {
1013        // release the ref held by the devhost
1014        dc_release_device(dev);
1015    } else {
1016        // Mark the device as a zombie but don't drop the
1017        // (likely) final reference.  The caller needs to
1018        // finish replying to the RPC and dropping the
1019        // reference would close the RPC channel.
1020        dev->flags |= DEV_CTX_ZOMBIE;
1021    }
1022    return ZX_OK;
1023}
1024
1025static zx_status_t dc_bind_device(device_t* dev, const char* drvlibname) {
1026     log(INFO, "devcoord: dc_bind_device() '%s'\n", drvlibname);
1027
1028    // shouldn't be possible to get a bind request for a proxy device
1029    if (dev->flags & DEV_CTX_PROXY) {
1030        return ZX_ERR_NOT_SUPPORTED;
1031    }
1032
1033    // A libname of "" means a general rebind request
1034    // instead of a specific request
1035    bool autobind = (drvlibname[0] == 0);
1036
1037    //TODO: disallow if we're in the middle of enumeration, etc
1038    driver_t* drv;
1039    list_for_every_entry(&list_drivers, drv, driver_t, node) {
1040        if (autobind || !strcmp(drv->libname, drvlibname)) {
1041            if (dc_is_bindable(drv, dev->protocol_id,
1042                               dev->props, dev->prop_count, autobind)) {
1043                log(SPEW, "devcoord: drv='%s' bindable to dev='%s'\n",
1044                    drv->name, dev->name);
1045                dc_attempt_bind(drv, dev);
1046                return ZX_OK;
1047            }
1048        }
1049    }
1050
1051    // Notify observers that this device is available again
1052    // Needed for non-auto-binding drivers like GPT against block, etc
1053    if (autobind) {
1054        devfs_advertise_modified(dev);
1055    }
1056
1057    return ZX_OK;
1058};
1059
1060static zx_status_t dc_load_firmware(device_t* dev, const char* path,
1061                                    zx_handle_t* vmo, size_t* size) {
1062    static const char* fwdirs[] = {
1063        BOOT_FIRMWARE_DIR,
1064        SYSTEM_FIRMWARE_DIR,
1065    };
1066
1067    int fd, fwfd;
1068    for (unsigned n = 0; n < countof(fwdirs); n++) {
1069        if ((fd = open(fwdirs[n], O_RDONLY, O_DIRECTORY)) < 0) {
1070            continue;
1071        }
1072        fwfd = openat(fd, path, O_RDONLY);
1073        close(fd);
1074        if (fwfd >= 0) {
1075            *size = lseek(fwfd, 0, SEEK_END);
1076            zx_status_t r = fdio_get_vmo_clone(fwfd, vmo);
1077            close(fwfd);
1078            return r;
1079        }
1080        if (errno != ENOENT) {
1081            return ZX_ERR_IO;
1082        }
1083    }
1084    return ZX_ERR_NOT_FOUND;
1085}
1086
1087// Returns true if the parent path is equal to or specifies a child device of the parent.
1088static bool path_is_child(const char* parent_path, const char* child_path) {
1089    size_t parent_length = strlen(parent_path);
1090    return (!strncmp(parent_path, child_path, parent_length) &&
1091        (child_path[parent_length] == 0 || child_path[parent_length] == '/'));
1092}
1093
1094static zx_status_t dc_get_metadata(device_t* dev, uint32_t type, void* buffer, size_t buflen,
1095                                   size_t* actual) {
1096    dc_metadata_t* md;
1097
1098    // search dev and its parent devices for a match
1099    device_t* test = dev;
1100    while (test) {
1101        list_for_every_entry(&test->metadata, md, dc_metadata_t, node) {
1102            if (md->type == type) {
1103                if (md->length > buflen) {
1104                    return ZX_ERR_BUFFER_TOO_SMALL;
1105                }
1106                memcpy(buffer, md->data, md->length);
1107                *actual = md->length;
1108                return ZX_OK;
1109            }
1110        }
1111        test = test->parent;
1112    }
1113
1114    // if no metadata is found, check list of metadata added via device_publish_metadata()
1115    char path[DC_PATH_MAX];
1116    zx_status_t status = dc_get_topo_path(dev, path, DC_PATH_MAX);
1117    if (status != ZX_OK) {
1118        return status;
1119    }
1120
1121    dc_metadata_t* temp;
1122    list_for_every_entry_safe(&published_metadata, md, temp, dc_metadata_t, node) {
1123        char* md_path = (char*)md->data + md->length;
1124        if (md->type == type && path_is_child(md_path, path)) {
1125            if (md->length > buflen) {
1126                return ZX_ERR_BUFFER_TOO_SMALL;
1127            }
1128            memcpy(buffer, md->data, md->length);
1129            *actual = md->length;
1130            return ZX_OK;
1131        }
1132    }
1133
1134    return ZX_ERR_NOT_FOUND;
1135}
1136
1137static zx_status_t dc_add_metadata(device_t* dev, uint32_t type, const void* data,
1138                                   uint32_t length) {
1139    auto md = static_cast<dc_metadata_t*>(calloc(1, sizeof(dc_metadata_t) + length));
1140    if (!md) {
1141        return ZX_ERR_NO_MEMORY;
1142    }
1143    new (md) dc_metadata_t;
1144
1145    md->type = type;
1146    md->length = length;
1147    memcpy(&md->data, data, length);
1148    list_add_head(&dev->metadata, &md->node);
1149    return ZX_OK;
1150}
1151
1152static zx_status_t dc_publish_metadata(device_t* dev, const char* path, uint32_t type,
1153                                       const void* data, uint32_t length) {
1154    char caller_path[DC_PATH_MAX];
1155    zx_status_t status = dc_get_topo_path(dev, caller_path, DC_PATH_MAX);
1156    if (status != ZX_OK) {
1157        return status;
1158    }
1159
1160    // Check to see if the specified path is a child of the caller's path
1161    if (path_is_child(caller_path, path)) {
1162        // Caller is adding a path that matches itself or one of its children, which is allowed.
1163    } else {
1164        // Adding metadata to arbitrary paths is restricted to drivers running in the sys devhost.
1165        while (dev && dev != &sys_device) {
1166            if (dev->proxy) {
1167                // this device is in a child devhost
1168                return ZX_ERR_ACCESS_DENIED;
1169            }
1170            dev = dev->parent;
1171        }
1172        if (!dev) {
1173            return ZX_ERR_ACCESS_DENIED;
1174        }
1175    }
1176
1177    auto md =
1178        static_cast<dc_metadata_t*>(calloc(1, sizeof(dc_metadata_t) + length + strlen(path) + 1));
1179    if (!md) {
1180        return ZX_ERR_NO_MEMORY;
1181    }
1182    new (md) dc_metadata_t;
1183
1184    md->type = type;
1185    md->length = length;
1186    md->has_path = true;
1187    memcpy(&md->data, data, length);
1188    strcpy((char*)md->data + length, path);
1189    list_add_head(&published_metadata, &md->node);
1190    return ZX_OK;
1191}
1192
1193static zx_status_t dc_handle_device_read(device_t* dev) {
1194    dc_msg_t msg;
1195    zx_handle_t hin[3];
1196    uint32_t msize = sizeof(msg);
1197    uint32_t hcount = 3;
1198
1199    if (dev->flags & DEV_CTX_DEAD) {
1200        log(ERROR, "devcoord: dev %p already dead (in read)\n", dev);
1201        return ZX_ERR_INTERNAL;
1202    }
1203
1204    zx_status_t r;
1205    if ((r = zx_channel_read(dev->hrpc, 0, &msg, hin,
1206                             msize, hcount, &msize, &hcount)) < 0) {
1207        return r;
1208    }
1209
1210    const void* data;
1211    const char* name;
1212    const char* args;
1213    if ((r = dc_msg_unpack(&msg, msize, &data, &name, &args)) < 0) {
1214        while (hcount > 0) {
1215            zx_handle_close(hin[--hcount]);
1216        }
1217        return ZX_ERR_INTERNAL;
1218    }
1219
1220    dc_status_t dcs;
1221    dcs.txid = msg.txid;
1222
1223    switch (msg.op) {
1224    case dc_msg_t::Op::kAddDevice:
1225    case dc_msg_t::Op::kAddDeviceInvisible:
1226        if (hcount != 1) {
1227            goto fail_wrong_hcount;
1228        }
1229        if (dc_in_suspend()) {
1230            log(ERROR, "devcoord: rpc: add-device '%s' forbidden in suspend\n",
1231                name);
1232            r = ZX_ERR_BAD_STATE;
1233            goto fail_close_handles;
1234        }
1235        log(RPC_IN, "devcoord: rpc: add-device '%s' args='%s'\n", name, args);
1236        if ((r = dc_add_device(dev, hin[0], &msg, name, args, data,
1237                               msg.op == dc_msg_t::Op::kAddDeviceInvisible)) < 0) {
1238            zx_handle_close(hin[0]);
1239        }
1240        break;
1241
1242    case dc_msg_t::Op::kRemoveDevice:
1243        if (hcount != 0) {
1244            goto fail_wrong_hcount;
1245        }
1246        if (dc_in_suspend()) {
1247            log(ERROR, "devcoord: rpc: remove-device '%s' forbidden in suspend\n",
1248                dev->name);
1249            r = ZX_ERR_BAD_STATE;
1250            goto fail_close_handles;
1251        }
1252        log(RPC_IN, "devcoord: rpc: remove-device '%s'\n", dev->name);
1253        dc_remove_device(dev, false);
1254        goto disconnect;
1255
1256    case dc_msg_t::Op::kMakeVisible:
1257        if (hcount != 0) {
1258            goto fail_wrong_hcount;
1259        }
1260        if (dc_in_suspend()) {
1261            log(ERROR, "devcoord: rpc: make-visible '%s' forbidden in suspend\n",
1262                dev->name);
1263            r = ZX_ERR_BAD_STATE;
1264            goto fail_close_handles;
1265        }
1266        log(RPC_IN, "devcoord: rpc: make-visible '%s'\n", dev->name);
1267        dc_make_visible(dev);
1268        r = ZX_OK;
1269        break;
1270
1271    case dc_msg_t::Op::kBindDevice:
1272        if (hcount != 0) {
1273            goto fail_wrong_hcount;
1274        }
1275        if (dc_in_suspend()) {
1276            log(ERROR, "devcoord: rpc: bind-device '%s' forbidden in suspend\n",
1277                dev->name);
1278            r = ZX_ERR_BAD_STATE;
1279            goto fail_close_handles;
1280        }
1281        log(RPC_IN, "devcoord: rpc: bind-device '%s'\n", dev->name);
1282        r = dc_bind_device(dev, args);
1283        break;
1284
1285    case dc_msg_t::Op::kDmCommand:
1286        if (hcount > 1) {
1287            goto fail_wrong_hcount;
1288        }
1289        if (hcount == 1) {
1290            dmctl_socket = hin[0];
1291        }
1292        r = handle_dmctl_write(msg.datalen, static_cast<const char*>(data));
1293        if (dmctl_socket != ZX_HANDLE_INVALID) {
1294            zx_handle_close(dmctl_socket);
1295            dmctl_socket = ZX_HANDLE_INVALID;
1296        }
1297        break;
1298
1299    case dc_msg_t::Op::kDmOpenVirtcon:
1300        if (hcount != 1) {
1301            goto fail_wrong_hcount;
1302        }
1303        zx_channel_write(virtcon_open, 0, nullptr, 0, hin, 1);
1304        r = ZX_OK;
1305        break;
1306
1307    case dc_msg_t::Op::kDmWatch:
1308        if (hcount != 1) {
1309            goto fail_wrong_hcount;
1310        }
1311        dc_watch(hin[0]);
1312        r = ZX_OK;
1313        break;
1314
1315    case dc_msg_t::Op::kDmMexec:
1316        if (hcount != 2) {
1317            log(ERROR, "devcoord: rpc: mexec wrong hcount %d\n", hcount);
1318            goto fail_wrong_hcount;
1319        }
1320        dc_mexec(hin);
1321        r = ZX_OK;
1322        break;
1323
1324    case dc_msg_t::Op::kGetTopoPath: {
1325        if (hcount != 0) {
1326            goto fail_wrong_hcount;
1327        }
1328        struct {
1329            dc_status_t rsp;
1330            char path[DC_PATH_MAX];
1331        } reply;
1332        if ((r = dc_get_topo_path(dev, reply.path, DC_PATH_MAX)) < 0) {
1333            break;
1334        }
1335        reply.rsp.status = ZX_OK;
1336        reply.rsp.txid = msg.txid;
1337        if ((r = zx_channel_write(dev->hrpc, 0, &reply, sizeof(reply), nullptr, 0)) < 0) {
1338            return r;
1339        }
1340        return ZX_OK;
1341    }
1342    case dc_msg_t::Op::kLoadFirmware: {
1343        if (hcount != 0) {
1344            goto fail_wrong_hcount;
1345        }
1346        zx_handle_t vmo;
1347        struct {
1348            dc_status_t rsp;
1349            size_t size;
1350        } reply;
1351        if ((r = dc_load_firmware(dev, args, &vmo, &reply.size)) < 0) {
1352            break;
1353        }
1354        reply.rsp.status = ZX_OK;
1355        reply.rsp.txid = msg.txid;
1356        if ((r = zx_channel_write(dev->hrpc, 0, &reply, sizeof(reply), &vmo, 1)) < 0) {
1357            return r;
1358        }
1359        return ZX_OK;
1360    }
1361    case dc_msg_t::Op::kStatus: {
1362        if (hcount != 0) {
1363            goto fail_wrong_hcount;
1364        }
1365        // all of these return directly and do not write a
1366        // reply, since this message is a reply itself
1367        pending_t* pending = list_remove_head_type(&dev->pending, pending_t, node);
1368        if (pending == nullptr) {
1369            log(ERROR, "devcoord: rpc: spurious status message\n");
1370            return ZX_OK;
1371        }
1372        switch (pending->op) {
1373        case dc_pending::Op::kBind:
1374            if (msg.status != ZX_OK) {
1375                log(ERROR, "devcoord: rpc: bind-driver '%s' status %d\n",
1376                    dev->name, msg.status);
1377            } else {
1378                dc_notify(dev, DEVMGR_OP_DEVICE_CHANGED);
1379            }
1380            //TODO: try next driver, clear BOUND flag
1381            break;
1382        case dc_pending::Op::kSuspend: {
1383            if (msg.status != ZX_OK) {
1384                log(ERROR, "devcoord: rpc: suspend '%s' status %d\n",
1385                    dev->name, msg.status);
1386            }
1387            auto ctx = static_cast<suspend_context_t*>(pending->ctx);
1388            ctx->status = msg.status;
1389            dc_continue_suspend(ctx);
1390            break;
1391        }
1392        }
1393        free(pending);
1394        return ZX_OK;
1395    }
1396    case dc_msg_t::Op::kGetMetadata: {
1397        if (hcount != 0) {
1398            goto fail_wrong_hcount;
1399        }
1400        struct {
1401            dc_status_t rsp;
1402            uint8_t data[DC_MAX_DATA];
1403        } reply;
1404        size_t actual = 0;
1405        reply.rsp.status = dc_get_metadata(dev, msg.value, &reply.data, sizeof(reply.data),
1406                                           &actual);
1407        reply.rsp.txid = msg.txid;
1408        uint32_t reply_size = static_cast<uint32_t>(sizeof(reply.rsp) + actual);
1409        return zx_channel_write(dev->hrpc, 0, &reply, reply_size, nullptr, 0);
1410    }
1411    case dc_msg_t::Op::kAddMetadata: {
1412        if (hcount != 0) {
1413            goto fail_wrong_hcount;
1414        }
1415        r = dc_add_metadata(dev, msg.value, data, msg.datalen);
1416        break;
1417    }
1418    case dc_msg_t::Op::kPublishMetadata: {
1419        if (hcount != 0) {
1420            goto fail_wrong_hcount;
1421        }
1422        r = dc_publish_metadata(dev, args, msg.value, data, msg.datalen);
1423        break;
1424    }
1425    default:
1426        log(ERROR, "devcoord: invalid rpc op %08x\n", static_cast<uint32_t>(msg.op));
1427        r = ZX_ERR_NOT_SUPPORTED;
1428        goto fail_close_handles;
1429    }
1430
1431done:
1432    dcs.status = r;
1433    if ((r = zx_channel_write(dev->hrpc, 0, &dcs, sizeof(dcs), nullptr, 0)) < 0) {
1434        return r;
1435    }
1436    return ZX_OK;
1437
1438disconnect:
1439    dcs.status = ZX_OK;
1440    zx_channel_write(dev->hrpc, 0, &dcs, sizeof(dcs), nullptr, 0);
1441    return ZX_ERR_STOP;
1442
1443fail_wrong_hcount:
1444    r = ZX_ERR_INVALID_ARGS;
1445fail_close_handles:
1446    while (hcount > 0) {
1447        zx_handle_close(hin[--hcount]);
1448    }
1449    goto done;
1450}
1451
1452#define dev_from_ph(ph) containerof(ph, device_t, ph)
1453
1454// handle inbound RPCs from devhost to devices
1455static zx_status_t dc_handle_device(port_handler_t* ph, zx_signals_t signals, uint32_t evt) {
1456    device_t* dev = dev_from_ph(ph);
1457
1458    if (signals & ZX_CHANNEL_READABLE) {
1459        zx_status_t r;
1460        if ((r = dc_handle_device_read(dev)) < 0) {
1461            if (r != ZX_ERR_STOP) {
1462                log(ERROR, "devcoord: device %p name='%s' rpc status: %d\n",
1463                    dev, dev->name, r);
1464            }
1465            dc_remove_device(dev, true);
1466            return ZX_ERR_STOP;
1467        }
1468        return ZX_OK;
1469    }
1470    if (signals & ZX_CHANNEL_PEER_CLOSED) {
1471        log(ERROR, "devcoord: device %p name='%s' disconnected!\n", dev, dev->name);
1472        dc_remove_device(dev, true);
1473        return ZX_ERR_STOP;
1474    }
1475    log(ERROR, "devcoord: no work? %08x\n", signals);
1476    return ZX_OK;
1477}
1478
1479// send message to devhost, requesting the creation of a device
1480static zx_status_t dh_create_device(device_t* dev, devhost_t* dh,
1481                                    const char* args, zx_handle_t rpc_proxy) {
1482    dc_msg_t msg;
1483    uint32_t mlen;
1484    zx_status_t r;
1485
1486    if ((r = dc_msg_pack(&msg, &mlen, nullptr, 0, dev->libname, args)) < 0) {
1487        return r;
1488    }
1489
1490    uint32_t hcount = 0;
1491    zx_handle_t handle[3], hrpc;
1492    if ((r = zx_channel_create(0, handle, &hrpc)) < 0) {
1493        return r;
1494    }
1495    hcount++;
1496
1497    if (dev->libname[0]) {
1498        if ((r = libname_to_vmo(dev->libname, handle + 1)) < 0) {
1499            goto fail;
1500        }
1501        hcount++;
1502        msg.op = dc_msg_t::Op::kCreateDevice;
1503    } else {
1504        msg.op = dc_msg_t::Op::kCreateDeviceStub;
1505    }
1506
1507    if (rpc_proxy) {
1508        handle[hcount++] = rpc_proxy;
1509    }
1510
1511    msg.txid = 0;
1512    msg.protocol_id = dev->protocol_id;
1513
1514    if ((r = zx_channel_write(dh->hrpc, 0, &msg, mlen, handle, hcount)) < 0) {
1515        goto fail_after_write;
1516    }
1517
1518    dev->hrpc = hrpc;
1519    dev->ph.handle = hrpc;
1520    dev->ph.waitfor = ZX_CHANNEL_READABLE | ZX_CHANNEL_PEER_CLOSED;
1521    dev->ph.func = dc_handle_device;
1522    if ((r = port_wait(&dc_port, &dev->ph)) < 0) {
1523        goto fail_after_write;
1524    }
1525    dev->host = dh;
1526    dh->refcount++;
1527    list_add_tail(&dh->devices, &dev->dhnode);
1528    return ZX_OK;
1529
1530fail:
1531    zx_handle_close_many(handle, hcount);
1532fail_after_write:
1533    zx_handle_close(hrpc);
1534    return r;
1535}
1536
1537static zx_status_t dc_create_proxy(device_t* parent) {
1538    if (parent->proxy != nullptr) {
1539        return ZX_OK;
1540    }
1541
1542    size_t namelen = strlen(parent->name);
1543    size_t liblen = strlen(parent->libname);
1544    size_t devlen = sizeof(device_t) + namelen + liblen + 2;
1545
1546    // non-immortal devices, use foo.proxy.so for
1547    // their proxy devices instead of foo.so
1548    bool proxylib = !(parent->flags & DEV_CTX_IMMORTAL);
1549
1550    if (proxylib) {
1551        if (liblen < 3) {
1552            return ZX_ERR_INTERNAL;
1553        }
1554        // space for ".proxy"
1555        devlen += 6;
1556    }
1557
1558    device_t* dev = static_cast<device_t*>(calloc(1, devlen));
1559    if (dev == nullptr) {
1560        return ZX_ERR_NO_MEMORY;
1561    }
1562    new (dev) device_t;
1563    char* text = (char*) (dev + 1);
1564    memcpy(text, parent->name, namelen + 1);
1565    dev->name = text;
1566    text += namelen + 1;
1567    memcpy(text, parent->libname, liblen + 1);
1568    if (proxylib) {
1569        memcpy(text + liblen - 3, ".proxy.so", 10);
1570    }
1571    dev->libname = text;
1572
1573    list_initialize(&dev->children);
1574    list_initialize(&dev->pending);
1575    list_initialize(&dev->metadata);
1576    dev->flags = DEV_CTX_PROXY;
1577    dev->protocol_id = parent->protocol_id;
1578    dev->parent = parent;
1579    dev->refcount = 1;
1580    parent->proxy = dev;
1581    parent->refcount++;
1582    log(DEVLC, "devcoord: dev %p name='%s' ++ref=%d (proxy)\n",
1583        parent, parent->name, parent->refcount);
1584    return ZX_OK;
1585}
1586
1587// send message to devhost, requesting the binding of a driver to a device
1588static zx_status_t dh_bind_driver(device_t* dev, const char* libname) {
1589    dc_msg_t msg;
1590    uint32_t mlen;
1591
1592    pending_t* pending = static_cast<pending_t*>(malloc(sizeof(pending_t)));
1593    if (pending == nullptr) {
1594        return ZX_ERR_NO_MEMORY;
1595    }
1596    new (pending) pending_t;
1597
1598    zx_status_t r;
1599    if ((r = dc_msg_pack(&msg, &mlen, nullptr, 0, libname, nullptr)) < 0) {
1600        free(pending);
1601        return r;
1602    }
1603
1604    zx_handle_t vmo;
1605    if ((r = libname_to_vmo(libname, &vmo)) < 0) {
1606        free(pending);
1607        return r;
1608    }
1609
1610    msg.txid = 0;
1611    msg.op = dc_msg_t::Op::kBindDriver;
1612
1613    if ((r = zx_channel_write(dev->hrpc, 0, &msg, mlen, &vmo, 1)) < 0) {
1614        free(pending);
1615        return r;
1616    }
1617
1618    dev->flags |= DEV_CTX_BOUND;
1619    pending->op = dc_pending::Op::kBind;
1620    pending->ctx = nullptr;
1621    list_add_tail(&dev->pending, &pending->node);
1622    return ZX_OK;
1623}
1624
1625static zx_status_t dh_connect_proxy(device_t* dev, zx_handle_t h) {
1626    dc_msg_t msg;
1627    uint32_t mlen;
1628    zx_status_t r;
1629    if ((r = dc_msg_pack(&msg, &mlen, nullptr, 0, nullptr, nullptr)) < 0) {
1630        zx_handle_close(h);
1631        return r;
1632    }
1633    msg.txid = 0;
1634    msg.op = dc_msg_t::Op::kConnectProxy;
1635    return zx_channel_write(dev->hrpc, 0, &msg, mlen, &h, 1);
1636}
1637
1638static zx_status_t dc_prepare_proxy(device_t* dev) {
1639    if (dev->flags & DEV_CTX_PROXY) {
1640        log(ERROR, "devcoord: cannot proxy a proxy: %s\n", dev->name);
1641        return ZX_ERR_INTERNAL;
1642    }
1643
1644    // proxy args are "processname,args"
1645    const char* arg0 = dev->args;
1646    const char* arg1 = strchr(arg0, ',');
1647    if (arg1 == nullptr) {
1648        return ZX_ERR_INTERNAL;
1649    }
1650    size_t arg0len = arg1 - arg0;
1651    arg1++;
1652
1653    char devhostname[32];
1654    snprintf(devhostname, sizeof(devhostname), "devhost:%.*s", (int) arg0len, arg0);
1655
1656    zx_status_t r;
1657    if ((r = dc_create_proxy(dev)) < 0) {
1658        log(ERROR, "devcoord: cannot create proxy device: %d\n", r);
1659        return r;
1660    }
1661
1662    // if this device has no devhost, first instantiate it
1663    if (dev->proxy->host == nullptr) {
1664        zx_handle_t h0 = ZX_HANDLE_INVALID, h1 = ZX_HANDLE_INVALID;
1665
1666        // the immortal root devices do not provide proxy rpc
1667        bool need_proxy_rpc = !(dev->flags & DEV_CTX_IMMORTAL);
1668
1669        if (need_proxy_rpc) {
1670            // create rpc channel for proxy device to talk to the busdev it proxys
1671            if ((r = zx_channel_create(0, &h0, &h1)) < 0) {
1672                log(ERROR, "devcoord: cannot create proxy rpc channel: %d\n", r);
1673                return r;
1674            }
1675        } else if (dev == &sys_device) {
1676            // pass bootdata VMO handle to sys device
1677            h1 = bootdata_vmo;
1678        }
1679        if ((r = dc_new_devhost(devhostname, dev->host,
1680                                &dev->proxy->host)) < 0) {
1681            log(ERROR, "devcoord: dc_new_devhost: %d\n", r);
1682            zx_handle_close(h0);
1683            zx_handle_close(h1);
1684            return r;
1685        }
1686        if ((r = dh_create_device(dev->proxy, dev->proxy->host, arg1, h1)) < 0) {
1687            log(ERROR, "devcoord: dh_create_device: %d\n", r);
1688            zx_handle_close(h0);
1689            return r;
1690        }
1691        if (need_proxy_rpc) {
1692            if ((r = dh_connect_proxy(dev, h0)) < 0) {
1693                log(ERROR, "devcoord: dh_connect_proxy: %d\n", r);
1694            }
1695        }
1696    }
1697
1698    return ZX_OK;
1699}
1700
1701static zx_status_t dc_attempt_bind(driver_t* drv, device_t* dev) {
1702    // cannot bind driver to already bound device
1703    if ((dev->flags & DEV_CTX_BOUND) && (!(dev->flags & DEV_CTX_MULTI_BIND))) {
1704        return ZX_ERR_BAD_STATE;
1705    }
1706    if (!(dev->flags & DEV_CTX_MUST_ISOLATE)) {
1707        // non-busdev is pretty simple
1708        if (dev->host == nullptr) {
1709            log(ERROR, "devcoord: can't bind to device without devhost\n");
1710            return ZX_ERR_BAD_STATE;
1711        }
1712        return dh_bind_driver(dev, drv->libname);
1713    }
1714
1715    zx_status_t r;
1716    if ((r = dc_prepare_proxy(dev)) < 0) {
1717        return r;
1718    }
1719
1720    r = dh_bind_driver(dev->proxy, drv->libname);
1721    //TODO(swetland): arrange to mark us unbound when the proxy (or its devhost) goes away
1722    if ((r == ZX_OK) && !(dev->flags & DEV_CTX_MULTI_BIND)) {
1723        dev->flags |= DEV_CTX_BOUND;
1724    }
1725    return r;
1726}
1727
1728static void dc_handle_new_device(device_t* dev) {
1729    driver_t* drv;
1730
1731    list_for_every_entry(&list_drivers, drv, driver_t, node) {
1732        if (dc_is_bindable(drv, dev->protocol_id,
1733                           dev->props, dev->prop_count, true)) {
1734            log(SPEW, "devcoord: drv='%s' bindable to dev='%s'\n",
1735                drv->name, dev->name);
1736
1737            dc_attempt_bind(drv, dev);
1738            if (!(dev->flags & DEV_CTX_MULTI_BIND)) {
1739                break;
1740            }
1741        }
1742    }
1743}
1744
1745static void dc_suspend_fallback(uint32_t flags) {
1746    log(INFO, "devcoord: suspend fallback with flags 0x%08x\n", flags);
1747    if (flags == DEVICE_SUSPEND_FLAG_REBOOT) {
1748        zx_system_powerctl(get_root_resource(), ZX_SYSTEM_POWERCTL_REBOOT, nullptr);
1749    } else if (flags == DEVICE_SUSPEND_FLAG_REBOOT_BOOTLOADER) {
1750        zx_system_powerctl(get_root_resource(), ZX_SYSTEM_POWERCTL_REBOOT_BOOTLOADER, nullptr);
1751    } else if (flags == DEVICE_SUSPEND_FLAG_REBOOT_RECOVERY) {
1752        zx_system_powerctl(get_root_resource(), ZX_SYSTEM_POWERCTL_REBOOT_RECOVERY, nullptr);
1753    } else if (flags == DEVICE_SUSPEND_FLAG_POWEROFF) {
1754        zx_system_powerctl(get_root_resource(), ZX_SYSTEM_POWERCTL_SHUTDOWN, nullptr);
1755    }
1756}
1757
1758static zx_status_t dc_suspend_devhost(devhost_t* dh, suspend_context_t* ctx) {
1759    device_t* dev = list_peek_head_type(&dh->devices, device_t, dhnode);
1760    if (!dev) {
1761        return ZX_OK;
1762    }
1763
1764    if (!(dev->flags & DEV_CTX_PROXY)) {
1765        log(INFO, "devcoord: devhost root '%s' (%p) is not a proxy\n",
1766            dev->name, dev);
1767        return ZX_ERR_BAD_STATE;
1768    }
1769
1770    log(DEVLC, "devcoord: suspend devhost %p device '%s' (%p)\n",
1771        dh, dev->name, dev);
1772
1773    zx_handle_t rpc = ZX_HANDLE_INVALID;
1774
1775    pending_t* pending = static_cast<pending_t*>(malloc(sizeof(pending_t)));
1776    if (pending == nullptr) {
1777        return ZX_ERR_NO_MEMORY;
1778    }
1779    new (pending) pending_t;
1780
1781    dc_msg_t msg;
1782    uint32_t mlen;
1783    zx_status_t r;
1784    if ((r = dc_msg_pack(&msg, &mlen, nullptr, 0, nullptr, nullptr)) < 0) {
1785        free(pending);
1786        return r;
1787    }
1788    msg.txid = 0;
1789    msg.op = dc_msg_t::Op::kSuspend;
1790    msg.value = ctx->sflags;
1791    rpc = dev->hrpc;
1792    if ((r = zx_channel_write(rpc, 0, &msg, mlen, nullptr, 0)) != ZX_OK) {
1793        free(pending);
1794        return r;
1795    }
1796
1797    dh->flags |= DEV_HOST_SUSPEND;
1798    pending->op = dc_pending::Op::kSuspend;
1799    pending->ctx = ctx;
1800    list_add_tail(&dev->pending, &pending->node);
1801
1802    ctx->count += 1;
1803
1804    return ZX_OK;
1805}
1806
1807static void append_suspend_list(suspend_context_t* ctx, devhost_t* dh) {
1808    // suspend order is children first
1809    devhost_t* child = nullptr;
1810    list_for_every_entry(&dh->children, child, devhost_t, node) {
1811        list_add_head(&ctx->devhosts, &child->snode);
1812    }
1813    list_for_every_entry(&dh->children, child, devhost_t, node) {
1814        append_suspend_list(ctx, child);
1815    }
1816}
1817
1818static void build_suspend_list(suspend_context_t* ctx) {
1819    // sys_device must suspend last as on x86 it invokes
1820    // ACPI S-state transition
1821    list_add_head(&ctx->devhosts, &sys_device.proxy->host->snode);
1822    append_suspend_list(ctx, sys_device.proxy->host);
1823    list_add_head(&ctx->devhosts, &root_device.proxy->host->snode);
1824    append_suspend_list(ctx, root_device.proxy->host);
1825    list_add_head(&ctx->devhosts, &misc_device.proxy->host->snode);
1826    append_suspend_list(ctx, misc_device.proxy->host);
1827    // test devices do not (yet) participate in suspend
1828}
1829
1830static void process_suspend_list(suspend_context_t* ctx) {
1831    devhost_t* dh = ctx->dh;
1832    devhost_t* parent = nullptr;
1833    do {
1834        if (!parent || (dh->parent == parent)) {
1835            // send dc_msg_t::Op::kSuspend each set of children of a devhost at a time,
1836            // since they can run in parallel
1837            dc_suspend_devhost(dh, &suspend_ctx);
1838            parent = dh->parent;
1839        } else {
1840            // if the parent is different than the previous devhost's
1841            // parent, either this devhost is the parent, a child of
1842            // its parent's sibling, or the parent's sibling, so stop
1843            // processing until all the outstanding suspends are done
1844            parent = nullptr;
1845            break;
1846        }
1847    } while ((dh = list_next_type(&ctx->devhosts, &dh->snode,
1848                                  devhost_t, snode)) != nullptr);
1849    // next devhost to process once all the outstanding suspends are done
1850    ctx->dh = dh;
1851}
1852
1853static bool check_pending(device_t* dev) {
1854    pending_t* pending;
1855    if (dev->proxy) {
1856        pending = list_peek_tail_type(&dev->proxy->pending, pending_t, node);
1857    } else {
1858        pending = list_peek_tail_type(&dev->pending, pending_t, node);
1859    }
1860    if ((pending == nullptr) || (pending->op != dc_pending::Op::kSuspend)) {
1861        return false;
1862    } else {
1863        log(ERROR, "  devhost with device '%s' timed out\n", dev->name);
1864        return true;
1865    }
1866}
1867
1868static int suspend_timeout_thread(void* arg) {
1869    // 10 seconds
1870    zx_nanosleep(zx_deadline_after(ZX_SEC(10)));
1871
1872    auto ctx = static_cast<suspend_context_t*>(arg);
1873    if (suspend_debug) {
1874        if (ctx->flags == RUNNING) {
1875            return 0; // success
1876        }
1877        log(ERROR, "devcoord: suspend time out\n");
1878        log(ERROR, "  sflags: 0x%08x\n", ctx->sflags);
1879        device_t* dev;
1880        list_for_every_entry(&list_devices, dev, device_t, anode) {
1881            check_pending(dev);
1882        }
1883        check_pending(&root_device);
1884        check_pending(&misc_device);
1885        check_pending(&sys_device);
1886    }
1887    if (suspend_fallback) {
1888        dc_suspend_fallback(ctx->sflags);
1889    }
1890    return 0;
1891}
1892
1893static void dc_suspend(uint32_t flags) {
1894    // these top level devices should all have proxies. if not,
1895    // the system hasn't fully initialized yet and cannot go to
1896    // suspend.
1897    if (!sys_device.proxy || !root_device.proxy || !misc_device.proxy) {
1898        return;
1899    }
1900
1901    suspend_context_t* ctx = &suspend_ctx;
1902    if (ctx->flags) {
1903        return;
1904    }
1905    memset(ctx, 0, sizeof(*ctx));
1906    ctx->status = ZX_OK;
1907    ctx->flags = SUSPEND;
1908    ctx->sflags = flags;
1909    ctx->socket = dmctl_socket;
1910    dmctl_socket = ZX_HANDLE_INVALID;   // to prevent the rpc handler from closing this handle
1911    list_initialize(&ctx->devhosts);
1912
1913    build_suspend_list(ctx);
1914
1915    if (suspend_fallback || suspend_debug) {
1916        thrd_t t;
1917        int ret = thrd_create_with_name(&t, suspend_timeout_thread, ctx,
1918                                        "devcoord-suspend-timeout");
1919        if (ret != thrd_success) {
1920            log(ERROR, "devcoord: can't create suspend timeout thread\n");
1921        }
1922    }
1923
1924    ctx->dh = list_peek_head_type(&ctx->devhosts, devhost_t, snode);
1925    process_suspend_list(ctx);
1926}
1927
1928static void dc_mexec(zx_handle_t* h) {
1929    // these top level devices should all have proxies. if not,
1930    // the system hasn't fully initialized yet and cannot mexec.
1931    if (!sys_device.proxy || !root_device.proxy || !misc_device.proxy) {
1932        return;
1933    }
1934
1935    suspend_context_t* ctx = &suspend_ctx;
1936    if (ctx->flags) {
1937        return;
1938    }
1939    memset(ctx, 0, sizeof(*ctx));
1940    ctx->status = ZX_OK;
1941    ctx->flags = SUSPEND;
1942    ctx->sflags = DEVICE_SUSPEND_FLAG_MEXEC;
1943    list_initialize(&ctx->devhosts);
1944
1945    ctx->kernel = *h;
1946    ctx->bootdata = *(h + 1);
1947
1948    build_suspend_list(ctx);
1949
1950    ctx->dh = list_peek_head_type(&ctx->devhosts, devhost_t, snode);
1951    process_suspend_list(ctx);
1952}
1953
1954static void dc_continue_suspend(suspend_context_t* ctx) {
1955    if (ctx->status != ZX_OK) {
1956        // TODO: unroll suspend
1957        // do not continue to suspend as this indicates a driver suspend
1958        // problem and should show as a bug
1959        log(ERROR, "devcoord: failed to suspend\n");
1960        // notify dmctl
1961        if (ctx->socket) {
1962            zx_handle_close(ctx->socket);
1963        }
1964        if (ctx->sflags == DEVICE_SUSPEND_FLAG_MEXEC) {
1965            zx_object_signal(ctx->kernel, 0, ZX_USER_SIGNAL_0);
1966        }
1967        ctx->flags = 0;
1968        return;
1969    }
1970
1971    ctx->count -= 1;
1972    if (ctx->count == 0) {
1973        if (ctx->dh != nullptr) {
1974            process_suspend_list(ctx);
1975        } else if (ctx->sflags == DEVICE_SUSPEND_FLAG_MEXEC) {
1976            zx_system_mexec(get_root_resource(), ctx->kernel, ctx->bootdata);
1977        } else {
1978            // should never get here on x86
1979            // on arm, if the platform driver does not implement
1980            // suspend go to the kernel fallback
1981            dc_suspend_fallback(ctx->sflags);
1982            // this handle is leaked on the shutdown path for x86
1983            if (ctx->socket) {
1984                zx_handle_close(ctx->socket);
1985            }
1986            // if we get here the system did not suspend successfully
1987            ctx->flags = RUNNING;
1988        }
1989    }
1990}
1991
1992// device binding program that pure (parentless)
1993// misc devices use to get published in the misc devhost
1994static struct zx_bind_inst misc_device_binding =
1995    BI_MATCH_IF(EQ, BIND_PROTOCOL, ZX_PROTOCOL_MISC_PARENT);
1996
1997static bool is_misc_driver(driver_t* drv) {
1998    return (drv->binding_size == sizeof(misc_device_binding)) &&
1999        (memcmp(&misc_device_binding, drv->binding, sizeof(misc_device_binding)) == 0);
2000}
2001
2002// device binding program that pure (parentless)
2003// test devices use to get published in the test devhost
2004static struct zx_bind_inst test_device_binding =
2005    BI_MATCH_IF(EQ, BIND_PROTOCOL, ZX_PROTOCOL_TEST_PARENT);
2006
2007static bool is_test_driver(driver_t* drv) {
2008    return (drv->binding_size == sizeof(test_device_binding)) &&
2009        (memcmp(&test_device_binding, drv->binding, sizeof(test_device_binding)) == 0);
2010}
2011
2012
2013// device binding program that special root-level
2014// devices use to get published in the root devhost
2015static struct zx_bind_inst root_device_binding =
2016    BI_MATCH_IF(EQ, BIND_PROTOCOL, ZX_PROTOCOL_ROOT);
2017
2018static bool is_root_driver(driver_t* drv) {
2019    return (drv->binding_size == sizeof(root_device_binding)) &&
2020        (memcmp(&root_device_binding, drv->binding, sizeof(root_device_binding)) == 0);
2021}
2022
2023// dc_driver_added_init is called from driver enumeration during
2024// startup and before the devcoordinator starts running.  Enumerated
2025// drivers are added directly to the all-drivers or fallback list.
2026//
2027// TODO: fancier priorities
2028static void dc_driver_added_init(driver_t* drv, const char* version) {
2029    if (version[0] == '*') {
2030        // fallback driver, load only if all else fails
2031        list_add_tail(&list_drivers_fallback, &drv->node);
2032    } else if (version[0] == '!') {
2033        // debugging / development hack
2034        // prioritize drivers with version "!..." over others
2035        list_add_head(&list_drivers, &drv->node);
2036    } else {
2037        list_add_tail(&list_drivers, &drv->node);
2038    }
2039}
2040
2041static work_t new_driver_work;
2042
2043// dc_driver_added is called when a driver is added after the
2044// devcoordinator has started.  The driver is added to the new-drivers
2045// list and work is queued to process it.
2046static void dc_driver_added(driver_t* drv, const char* version) {
2047    list_add_tail(&list_drivers_new, &drv->node);
2048    if (new_driver_work.op == dc_work::Op::kIdle) {
2049        queue_work(&new_driver_work, dc_work::Op::kDriverAdded, 0);
2050    }
2051}
2052
2053device_t* coordinator_init(zx_handle_t root_job) {
2054    printf("coordinator_init()\n");
2055
2056    zx_status_t status = zx_job_create(root_job, 0u, &devhost_job);
2057    if (status < 0) {
2058        log(ERROR, "devcoord: unable to create devhost job\n");
2059    }
2060    static const zx_policy_basic_t policy[] = {
2061        { ZX_POL_BAD_HANDLE, ZX_POL_ACTION_EXCEPTION },
2062    };
2063    status = zx_job_set_policy(devhost_job, ZX_JOB_POL_RELATIVE,
2064                               ZX_JOB_POL_BASIC, &policy, countof(policy));
2065    if (status < 0) {
2066        log(ERROR, "devcoord: zx_job_set_policy() failed\n");
2067    }
2068    zx_object_set_property(devhost_job, ZX_PROP_NAME, "zircon-drivers", 15);
2069
2070    port_init(&dc_port);
2071
2072    return &root_device;
2073}
2074
2075// dc_bind_driver is called when a new driver becomes available to
2076// the devcoordinator.  Existing devices are inspected to see if the
2077// new driver is bindable to them (unless they are already bound).
2078void dc_bind_driver(driver_t* drv) {
2079    if (dc_running) {
2080        printf("devcoord: driver '%s' added\n", drv->name);
2081    }
2082    if (is_root_driver(drv)) {
2083        dc_attempt_bind(drv, &root_device);
2084    } else if (is_misc_driver(drv)) {
2085        dc_attempt_bind(drv, &misc_device);
2086    } else if (is_test_driver(drv)) {
2087        dc_attempt_bind(drv, &test_device);
2088    } else if (dc_running) {
2089        device_t* dev;
2090        list_for_every_entry(&list_devices, dev, device_t, anode) {
2091            if (dev->flags & (DEV_CTX_BOUND | DEV_CTX_DEAD |
2092                              DEV_CTX_ZOMBIE | DEV_CTX_INVISIBLE)) {
2093                // if device is already bound or being destroyed or invisible, skip it
2094                continue;
2095            }
2096            if (dc_is_bindable(drv, dev->protocol_id,
2097                               dev->props, dev->prop_count, true)) {
2098                log(INFO, "devcoord: drv='%s' bindable to dev='%s'\n",
2099                    drv->name, dev->name);
2100
2101                dc_attempt_bind(drv, dev);
2102            }
2103        }
2104    }
2105}
2106
2107void dc_handle_new_driver() {
2108    driver_t* drv;
2109    while ((drv = list_remove_head_type(&list_drivers_new, driver_t, node)) != nullptr) {
2110        list_add_tail(&list_drivers, &drv->node);
2111        dc_bind_driver(drv);
2112    }
2113}
2114
2115#define CTL_SCAN_SYSTEM 1
2116#define CTL_ADD_SYSTEM 2
2117
2118static bool system_available;
2119static bool system_loaded;
2120
2121// List of drivers loaded from /system by system_driver_loader()
2122static list_node_t list_drivers_system = LIST_INITIAL_VALUE(list_drivers_system);
2123
2124static int system_driver_loader(void* arg);
2125
2126static zx_status_t dc_control_event(port_handler_t* ph, zx_signals_t signals, uint32_t evt) {
2127    switch (evt) {
2128    case CTL_SCAN_SYSTEM:
2129        if (!system_loaded) {
2130            system_loaded = true;
2131            // Fire up a thread to scan/load system drivers
2132            // This avoids deadlocks between the devhosts hosting the block devices
2133            // that these drivers may be served from and the devcoordinator loading them.
2134            thrd_t t;
2135            thrd_create_with_name(&t, system_driver_loader, nullptr, "system-driver-loader");
2136        }
2137        break;
2138    case CTL_ADD_SYSTEM: {
2139        driver_t* drv;
2140        // Add system drivers to the new list
2141        while ((drv = list_remove_head_type(&list_drivers_system, driver_t, node)) != nullptr) {
2142            list_add_tail(&list_drivers_new, &drv->node);
2143        }
2144        // Add any remaining fallback drivers to the new list
2145        while ((drv = list_remove_tail_type(&list_drivers_fallback, driver_t, node)) != nullptr) {
2146            printf("devcoord: fallback driver '%s' is available\n", drv->name);
2147            list_add_tail(&list_drivers_new, &drv->node);
2148        }
2149        // Queue Driver Added work if not already queued
2150        if (new_driver_work.op == dc_work::Op::kIdle) {
2151            queue_work(&new_driver_work, dc_work::Op::kDriverAdded, 0);
2152        }
2153        break;
2154    }
2155    }
2156    return ZX_OK;
2157}
2158
2159static port_handler_t control_handler = []() {
2160    port_handler_t handler;
2161    handler.func = dc_control_event;
2162    return handler;
2163}();
2164
2165// Drivers added during system scan (from the dedicated thread)
2166// are added to list_drivers_system for bulk processing once
2167// CTL_ADD_SYSTEM is sent.
2168//
2169// TODO: fancier priority management
2170static void dc_driver_added_sys(driver_t* drv, const char* version) {
2171    log(INFO, "devmgr: adding system driver '%s' '%s'\n", drv->name, drv->libname);
2172
2173    if (load_vmo(drv->libname, &drv->dso_vmo)) {
2174        log(ERROR, "devmgr: system driver '%s' '%s' could not cache DSO\n", drv->name, drv->libname);
2175    }
2176    if (version[0] == '*') {
2177        // de-prioritize drivers that are "fallback"
2178        list_add_tail(&list_drivers_system, &drv->node);
2179    } else {
2180        list_add_head(&list_drivers_system, &drv->node);
2181    }
2182}
2183
2184static int system_driver_loader(void* arg) {
2185    find_loadable_drivers("/system/driver", dc_driver_added_sys);
2186    find_loadable_drivers("/system/lib/driver", dc_driver_added_sys);
2187    port_queue(&dc_port, &control_handler, CTL_ADD_SYSTEM);
2188    return 0;
2189}
2190
2191void load_system_drivers() {
2192    system_available = true;
2193    port_queue(&dc_port, &control_handler, CTL_SCAN_SYSTEM);
2194}
2195
2196void coordinator() {
2197    log(INFO, "devmgr: coordinator()\n");
2198
2199    if (getenv_bool("devmgr.verbose", false)) {
2200        log_flags |= LOG_DEVLC;
2201    }
2202
2203    suspend_fallback = getenv_bool("devmgr.suspend-timeout-fallback", false);
2204    suspend_debug = getenv_bool("devmgr.suspend-timeout-debug", false);
2205
2206    dc_asan_drivers = getenv_bool("devmgr.devhost.asan", false);
2207
2208    devfs_publish(&root_device, &misc_device);
2209    devfs_publish(&root_device, &sys_device);
2210    devfs_publish(&root_device, &test_device);
2211
2212    find_loadable_drivers("/boot/driver", dc_driver_added_init);
2213    find_loadable_drivers("/boot/driver/test", dc_driver_added_init);
2214    find_loadable_drivers("/boot/lib/driver", dc_driver_added_init);
2215
2216    // Special case early handling for the ramdisk boot
2217    // path where /system is present before the coordinator
2218    // starts.  This avoids breaking the "priority hack" and
2219    // can be removed once the real driver priority system
2220    // exists.
2221    if (system_available) {
2222        dc_control_event(&control_handler, 0, CTL_SCAN_SYSTEM);
2223    }
2224
2225    // x86 platforms use acpi as the system device
2226    // all other platforms use the platform bus
2227#if defined(__x86_64__)
2228    sys_device.libname = "/boot/driver/bus-acpi.so";
2229#else
2230    sys_device.libname = "/boot/driver/platform-bus.so";
2231#endif
2232    dc_prepare_proxy(&sys_device);
2233    dc_prepare_proxy(&test_device);
2234
2235    if (require_system && !system_loaded) {
2236        printf("devcoord: full system required, ignoring fallback drivers until /system is loaded\n");
2237    } else {
2238        driver_t* drv;
2239        while ((drv = list_remove_tail_type(&list_drivers_fallback, driver_t, node)) != nullptr) {
2240            list_add_tail(&list_drivers, &drv->node);
2241        }
2242    }
2243
2244    // Initial bind attempt for drivers enumerated at startup.
2245    driver_t* drv;
2246    list_for_every_entry(&list_drivers, drv, driver_t, node) {
2247        dc_bind_driver(drv);
2248    }
2249
2250    dc_running = true;
2251
2252    for (;;) {
2253        zx_status_t status;
2254        if (list_is_empty(&list_pending_work)) {
2255            status = port_dispatch(&dc_port, ZX_TIME_INFINITE, true);
2256        } else {
2257            status = port_dispatch(&dc_port, 0, true);
2258            if (status == ZX_ERR_TIMED_OUT) {
2259                process_work(list_remove_head_type(&list_pending_work, work_t, node));
2260                continue;
2261            }
2262        }
2263        if (status != ZX_OK) {
2264            log(ERROR, "devcoord: port dispatch ended: %d\n", status);
2265        }
2266    }
2267}
2268