1// Copyright 2016 The Fuchsia Authors
2//
3// Use of this source code is governed by a MIT-style
4// license that can be found in the LICENSE file or at
5// https://opensource.org/licenses/MIT
6
7#include <inttypes.h>
8
9#include <object/job_dispatcher.h>
10
11#include <err.h>
12
13#include <zircon/rights.h>
14#include <zircon/syscalls/policy.h>
15
16#include <fbl/alloc_checker.h>
17#include <fbl/array.h>
18#include <fbl/auto_lock.h>
19#include <fbl/mutex.h>
20
21#include <object/process_dispatcher.h>
22
23#include <platform.h>
24
25// The starting max_height value of the root job.
26static constexpr uint32_t kRootJobMaxHeight = 32;
27
28static constexpr char kRootJobName[] = "<superroot>";
29
30template <>
31uint32_t JobDispatcher::ChildCountLocked<JobDispatcher>() const {
32    return job_count_;
33}
34
35template <>
36uint32_t JobDispatcher::ChildCountLocked<ProcessDispatcher>() const {
37    return process_count_;
38}
39
40// Calls the provided |zx_status_t func(fbl::RefPtr<DISPATCHER_TYPE>)|
41// function on all live elements of |children|, which must be one of |jobs_|
42// or |procs_|. Stops iterating early if |func| returns a value other than
43// ZX_OK, returning that value from this method. |lock_| must be held when
44// calling this method, and it will still be held while the callback is
45// called.
46//
47// The returned |LiveRefsArray| needs to be destructed when |lock_| is not
48// held anymore. The recommended pattern is:
49//
50//  LiveRefsArray refs;
51//  {
52//      Guard<fbl::Mutex> guard{get_lock()};
53//      refs = ForEachChildInLocked(...);
54//  }
55//
56template <typename T, typename Fn>
57JobDispatcher::LiveRefsArray JobDispatcher::ForEachChildInLocked(
58    T& children, zx_status_t* result, Fn func) {
59    // Convert child raw pointers into RefPtrs. This is tricky and requires
60    // special logic on the RefPtr class to handle a ref count that can be
61    // zero.
62    //
63    // The main requirement is that |lock_| is both controlling child
64    // list lookup and also making sure that the child destructor cannot
65    // make progress when doing so. In other words, when inspecting the
66    // |children| list we can be sure that a given child process or child
67    // job is either
68    //   - alive, with refcount > 0
69    //   - in destruction process but blocked, refcount == 0
70
71    const uint32_t count = ChildCountLocked<typename T::ValueType>();
72
73    if (!count) {
74        *result = ZX_OK;
75        return LiveRefsArray();
76    }
77
78    fbl::AllocChecker ac;
79    LiveRefsArray refs(new (&ac) fbl::RefPtr<Dispatcher>[count], count);
80    if (!ac.check()) {
81        *result = ZX_ERR_NO_MEMORY;
82        return LiveRefsArray();
83    }
84
85    size_t ix = 0;
86
87    for (auto& craw : children) {
88        auto cref = ::fbl::internal::MakeRefPtrUpgradeFromRaw(&craw, lock_.lock());
89        if (!cref)
90            continue;
91
92        *result = func(cref);
93        // |cref| might be the last reference at this point. If so,
94        // when we drop it in the next iteration the object dtor
95        // would be called here with the |get_lock()| held. To avoid that
96        // we keep the reference alive in the |refs| array and pass
97        // the responsibility of releasing them outside the lock to
98        // the caller.
99        refs[ix++] = fbl::move(cref);
100
101        if (*result != ZX_OK)
102            break;
103    }
104
105    return refs;
106}
107
108fbl::RefPtr<JobDispatcher> JobDispatcher::CreateRootJob() {
109    fbl::AllocChecker ac;
110    auto job = fbl::AdoptRef(new (&ac) JobDispatcher(0u, nullptr, kPolicyEmpty));
111    if (!ac.check())
112        return nullptr;
113    job->set_name(kRootJobName, sizeof(kRootJobName));
114    return job;
115}
116
117zx_status_t JobDispatcher::Create(uint32_t flags,
118                                  fbl::RefPtr<JobDispatcher> parent,
119                                  fbl::RefPtr<Dispatcher>* dispatcher,
120                                  zx_rights_t* rights) {
121    if (parent != nullptr && parent->max_height() == 0) {
122        // The parent job cannot have children.
123        return ZX_ERR_OUT_OF_RANGE;
124    }
125
126    fbl::AllocChecker ac;
127    fbl::RefPtr<JobDispatcher> job =
128        fbl::AdoptRef(new (&ac) JobDispatcher(flags, parent, parent->GetPolicy()));
129    if (!ac.check())
130        return ZX_ERR_NO_MEMORY;
131
132    if (!parent->AddChildJob(job)) {
133        return ZX_ERR_BAD_STATE;
134    }
135
136    *rights = ZX_DEFAULT_JOB_RIGHTS;
137    *dispatcher = fbl::move(job);
138    return ZX_OK;
139}
140
141JobDispatcher::JobDispatcher(uint32_t /*flags*/,
142                             fbl::RefPtr<JobDispatcher> parent,
143                             pol_cookie_t policy)
144    : SoloDispatcher(ZX_JOB_NO_PROCESSES | ZX_JOB_NO_JOBS),
145      parent_(fbl::move(parent)),
146      max_height_(parent_ ? parent_->max_height() - 1 : kRootJobMaxHeight),
147      state_(State::READY),
148      process_count_(0u),
149      job_count_(0u),
150      policy_(policy) {
151
152    // Set the initial job order, and try to make older jobs closer to
153    // the root (both hierarchically and temporally) show up earlier
154    // in enumeration.
155    if (parent_ == nullptr) {
156        // Root job is the most important.
157        Guard<fbl::Mutex> guard{AllJobsLock::Get()};
158        all_jobs_list_.push_back(this);
159    } else {
160        Guard<fbl::Mutex> parent_guard{parent_->get_lock()};
161        JobDispatcher* neighbor;
162        if (!parent_->jobs_.is_empty()) {
163            // Our youngest sibling.
164            //
165            // IMPORTANT: We must hold the parent's lock during list insertion
166            // to ensure that our sibling stays alive until we're done with it.
167            // The sibling may be in its dtor right now, trying to remove itself
168            // from parent_->jobs_ but blocked on parent_->get_lock(), and could be
169            // freed if we released the lock.
170            neighbor = &parent_->jobs_.back();
171
172            // This can't be us: we aren't added to our parent's child list
173            // until after construction.
174            DEBUG_ASSERT(!dll_job_raw_.InContainer());
175            DEBUG_ASSERT(neighbor != this);
176        } else {
177            // Our parent.
178            neighbor = parent_.get();
179        }
180
181        // Make ourselves appear after our next-youngest neighbor.
182        Guard<fbl::Mutex> guard{AllJobsLock::Get()};
183        all_jobs_list_.insert(all_jobs_list_.make_iterator(*neighbor), this);
184    }
185}
186
187JobDispatcher::~JobDispatcher() {
188    if (parent_)
189        parent_->RemoveChildJob(this);
190
191    {
192        Guard<fbl::Mutex> guard{AllJobsLock::Get()};
193        DEBUG_ASSERT(dll_all_jobs_.InContainer());
194        all_jobs_list_.erase(*this);
195    }
196}
197
198zx_koid_t JobDispatcher::get_related_koid() const {
199    return parent_ ? parent_->get_koid() : 0u;
200}
201
202bool JobDispatcher::AddChildProcess(const fbl::RefPtr<ProcessDispatcher>& process) {
203    canary_.Assert();
204
205    Guard<fbl::Mutex> guard{get_lock()};
206    if (state_ != State::READY)
207        return false;
208    procs_.push_back(process.get());
209    ++process_count_;
210    UpdateSignalsIncrementLocked();
211    return true;
212}
213
214bool JobDispatcher::AddChildJob(const fbl::RefPtr<JobDispatcher>& job) {
215    canary_.Assert();
216
217    Guard<fbl::Mutex> guard{get_lock()};
218    if (state_ != State::READY)
219        return false;
220
221    jobs_.push_back(job.get());
222    ++job_count_;
223    UpdateSignalsIncrementLocked();
224    return true;
225}
226
227void JobDispatcher::RemoveChildProcess(ProcessDispatcher* process) {
228    canary_.Assert();
229
230    Guard<fbl::Mutex> guard{get_lock()};
231    // The process dispatcher can call us in its destructor, Kill(),
232    // or RemoveThread().
233    if (!ProcessDispatcher::JobListTraitsRaw::node_state(*process).InContainer())
234        return;
235    procs_.erase(*process);
236    --process_count_;
237    UpdateSignalsDecrementLocked();
238}
239
240void JobDispatcher::RemoveChildJob(JobDispatcher* job) {
241    canary_.Assert();
242
243    Guard<fbl::Mutex> guard{get_lock()};
244    if (!JobDispatcher::ListTraitsRaw::node_state(*job).InContainer())
245        return;
246    jobs_.erase(*job);
247    --job_count_;
248    UpdateSignalsDecrementLocked();
249}
250
251void JobDispatcher::UpdateSignalsDecrementLocked() {
252    canary_.Assert();
253
254    DEBUG_ASSERT(get_lock()->lock().IsHeld());
255
256    // removing jobs or processes.
257    zx_signals_t set = 0u;
258    if (process_count_ == 0u) {
259        DEBUG_ASSERT(procs_.is_empty());
260        set |= ZX_JOB_NO_PROCESSES;
261    }
262    if (job_count_ == 0u) {
263        DEBUG_ASSERT(jobs_.is_empty());
264        set |= ZX_JOB_NO_JOBS;
265    }
266
267    if ((job_count_ == 0) && (process_count_ == 0)) {
268        if (state_ == State::KILLING)
269            state_ = State::DEAD;
270
271        if (!parent_) {
272            // There are no userspace process left. From here, there's
273            // no particular context as to whether this was
274            // intentional, or if a core devhost crashed due to a
275            // bug. Either way, shut down the kernel.
276            platform_halt(HALT_ACTION_HALT, HALT_REASON_SW_RESET);
277        }
278    }
279
280    UpdateStateLocked(0u, set);
281}
282
283void JobDispatcher::UpdateSignalsIncrementLocked() {
284    canary_.Assert();
285
286    DEBUG_ASSERT(get_lock()->lock().IsHeld());
287
288    // Adding jobs or processes.
289    zx_signals_t clear = 0u;
290    if (process_count_ == 1u) {
291        DEBUG_ASSERT(!procs_.is_empty());
292        clear |= ZX_JOB_NO_PROCESSES;
293    }
294    if (job_count_ == 1u) {
295        DEBUG_ASSERT(!jobs_.is_empty());
296        clear |= ZX_JOB_NO_JOBS;
297    }
298    UpdateStateLocked(clear, 0u);
299}
300
301pol_cookie_t JobDispatcher::GetPolicy() {
302    Guard<fbl::Mutex> guard{get_lock()};
303    return policy_;
304}
305
306void JobDispatcher::Kill() {
307    canary_.Assert();
308
309    JobList jobs_to_kill;
310    ProcessList procs_to_kill;
311
312    LiveRefsArray jobs_refs;
313    LiveRefsArray proc_refs;
314
315    {
316        Guard<fbl::Mutex> guard{get_lock()};
317        if (state_ != State::READY)
318            return;
319
320        // Short circuit if there is nothing to do. Notice |state_|
321        // does not change.
322        if ((job_count_ == 0u) && (process_count_ == 0u))
323            return;
324
325        state_ = State::KILLING;
326        zx_status_t result;
327
328        // Safely gather refs to the children.
329        jobs_refs = ForEachChildInLocked(jobs_, &result, [&](fbl::RefPtr<JobDispatcher> job) {
330            jobs_to_kill.push_front(fbl::move(job));
331            return ZX_OK;
332        });
333        proc_refs = ForEachChildInLocked(procs_, &result, [&](fbl::RefPtr<ProcessDispatcher> proc) {
334            procs_to_kill.push_front(fbl::move(proc));
335            return ZX_OK;
336        });
337    }
338
339    // Since we kill the child jobs first we have a depth-first massacre.
340    while (!jobs_to_kill.is_empty()) {
341        // TODO(cpu): This recursive call can overflow the stack.
342        jobs_to_kill.pop_front()->Kill();
343    }
344
345    while (!procs_to_kill.is_empty()) {
346        procs_to_kill.pop_front()->Kill();
347    }
348}
349
350zx_status_t JobDispatcher::SetPolicy(
351    uint32_t mode, const zx_policy_basic* in_policy, size_t policy_count) {
352    // Can't set policy when there are active processes or jobs.
353    Guard<fbl::Mutex> guard{get_lock()};
354
355    if (!procs_.is_empty() || !jobs_.is_empty())
356        return ZX_ERR_BAD_STATE;
357
358    pol_cookie_t new_policy;
359    auto status = GetSystemPolicyManager()->AddPolicy(
360        mode, policy_, in_policy, policy_count, &new_policy);
361
362    if (status != ZX_OK)
363        return status;
364
365    policy_ = new_policy;
366    return ZX_OK;
367}
368
369bool JobDispatcher::EnumerateChildren(JobEnumerator* je, bool recurse) {
370    canary_.Assert();
371
372    LiveRefsArray jobs_refs;
373    LiveRefsArray proc_refs;
374
375    zx_status_t result = ZX_OK;
376
377    {
378        Guard<fbl::Mutex> guard{get_lock()};
379
380        proc_refs = ForEachChildInLocked(
381            procs_, &result, [&](fbl::RefPtr<ProcessDispatcher> proc) {
382                return je->OnProcess(proc.get()) ? ZX_OK : ZX_ERR_STOP;
383            });
384        if (result != ZX_OK) {
385            return false;
386        }
387
388        jobs_refs = ForEachChildInLocked(jobs_, &result, [&](fbl::RefPtr<JobDispatcher> job) {
389            if (!je->OnJob(job.get())) {
390                return ZX_ERR_STOP;
391            }
392            if (recurse) {
393                // TODO(kulakowski): This recursive call can overflow the stack.
394                return job->EnumerateChildren(je, /* recurse */ true)
395                           ? ZX_OK
396                           : ZX_ERR_STOP;
397            }
398            return ZX_OK;
399        });
400    }
401
402    return result == ZX_OK;
403}
404
405fbl::RefPtr<ProcessDispatcher>
406JobDispatcher::LookupProcessById(zx_koid_t koid) {
407    canary_.Assert();
408
409    LiveRefsArray proc_refs;
410
411    fbl::RefPtr<ProcessDispatcher> found_proc;
412    {
413        Guard<fbl::Mutex> guard{get_lock()};
414        zx_status_t result;
415
416        proc_refs = ForEachChildInLocked(procs_, &result, [&](fbl::RefPtr<ProcessDispatcher> proc) {
417            if (proc->get_koid() == koid) {
418                found_proc = fbl::move(proc);
419                return ZX_ERR_STOP;
420            }
421            return ZX_OK;
422        });
423    }
424    return found_proc; // Null if not found.
425}
426
427fbl::RefPtr<JobDispatcher>
428JobDispatcher::LookupJobById(zx_koid_t koid) {
429    canary_.Assert();
430
431    LiveRefsArray jobs_refs;
432
433    fbl::RefPtr<JobDispatcher> found_job;
434    {
435        Guard<fbl::Mutex> guard{get_lock()};
436        zx_status_t result;
437
438        jobs_refs = ForEachChildInLocked(jobs_, &result, [&](fbl::RefPtr<JobDispatcher> job) {
439            if (job->get_koid() == koid) {
440                found_job = fbl::move(job);
441                return ZX_ERR_STOP;
442            }
443            return ZX_OK;
444        });
445    }
446    return found_job; // Null if not found.
447}
448
449void JobDispatcher::get_name(char out_name[ZX_MAX_NAME_LEN]) const {
450    canary_.Assert();
451
452    name_.get(ZX_MAX_NAME_LEN, out_name);
453}
454
455zx_status_t JobDispatcher::set_name(const char* name, size_t len) {
456    canary_.Assert();
457
458    return name_.set(name, len);
459}
460
461// Global list of all jobs.
462JobDispatcher::AllJobsList JobDispatcher::all_jobs_list_;
463
464zx_status_t JobDispatcher::SetExceptionPort(fbl::RefPtr<ExceptionPort> eport) {
465    canary_.Assert();
466    bool debugger = false;
467    switch (eport->type()) {
468    case ExceptionPort::Type::JOB_DEBUGGER:
469        debugger = true;
470        break;
471    case ExceptionPort::Type::JOB:
472        break;
473    default:
474        DEBUG_ASSERT_MSG(false, "unexpected port type: %d",
475                         static_cast<int>(eport->type()));
476        break;
477    }
478
479    Guard<fbl::Mutex> guard{get_lock()};
480    if (debugger) {
481        if (debugger_exception_port_)
482            return ZX_ERR_ALREADY_BOUND;
483        debugger_exception_port_ = fbl::move(eport);
484    } else {
485        if (exception_port_)
486            return ZX_ERR_ALREADY_BOUND;
487        exception_port_ = fbl::move(eport);
488    }
489    return ZX_OK;
490}
491
492class OnExceptionPortRemovalEnumerator final : public JobEnumerator {
493public:
494    OnExceptionPortRemovalEnumerator(fbl::RefPtr<ExceptionPort> eport)
495        : eport_(fbl::move(eport)) {}
496    OnExceptionPortRemovalEnumerator(const OnExceptionPortRemovalEnumerator&) = delete;
497
498private:
499    bool OnProcess(ProcessDispatcher* process) override {
500        process->OnExceptionPortRemoval(eport_);
501        // Keep looking.
502        return true;
503    }
504
505    fbl::RefPtr<ExceptionPort> eport_;
506};
507
508bool JobDispatcher::ResetExceptionPort(bool debugger, bool quietly) {
509    canary_.Assert();
510
511    fbl::RefPtr<ExceptionPort> eport;
512    {
513        Guard<fbl::Mutex> lock{get_lock()};
514        if (debugger) {
515            debugger_exception_port_.swap(eport);
516        } else {
517            exception_port_.swap(eport);
518        }
519        if (eport == nullptr) {
520            // Attempted to unbind when no exception port is bound.
521            return false;
522        }
523        // This method must guarantee that no caller will return until
524        // OnTargetUnbind has been called on the port-to-unbind.
525        // This becomes important when a manual unbind races with a
526        // PortDispatcher::on_zero_handles auto-unbind.
527        //
528        // If OnTargetUnbind were called outside of the lock, it would lead to
529        // a race (for threads A and B):
530        //
531        //   A: Calls ResetExceptionPort; acquires the lock
532        //   A: Sees a non-null exception_port_, swaps it into the eport local.
533        //      exception_port_ is now null.
534        //   A: Releases the lock
535        //
536        //   B: Calls ResetExceptionPort; acquires the lock
537        //   B: Sees a null exception_port_ and returns. But OnTargetUnbind()
538        //      hasn't yet been called for the port.
539        //
540        // So, call it before releasing the lock.
541        eport->OnTargetUnbind();
542    }
543
544    if (!quietly) {
545        OnExceptionPortRemovalEnumerator remover(eport);
546        if (!EnumerateChildren(&remover, true)) {
547            DEBUG_ASSERT(false);
548        }
549    }
550    return true;
551}
552
553fbl::RefPtr<ExceptionPort> JobDispatcher::exception_port() {
554    Guard<fbl::Mutex> lock{get_lock()};
555    return exception_port_;
556}
557
558fbl::RefPtr<ExceptionPort> JobDispatcher::debugger_exception_port() {
559    Guard<fbl::Mutex> guard{get_lock()};
560    return debugger_exception_port_;
561}
562