// Copyright 2017 The Fuchsia Authors // Copyright (c) 2008-2015 Travis Geiselbrecht // // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file or at // https://opensource.org/licenses/MIT #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // disable priority boosting #define NO_BOOST 0 #define MAX_PRIORITY_ADJ 4 // +/- priority levels from the base priority // ktraces just local to this file #define LOCAL_KTRACE 0 #if LOCAL_KTRACE #define LOCAL_KTRACE0(probe) ktrace_probe0(probe) #define LOCAL_KTRACE2(probe, x, y) ktrace_probe2(probe, x, y) #else #define LOCAL_KTRACE0(probe) #define LOCAL_KTRACE2(probe, x, y) #endif #define LOCAL_TRACE 0 #define DEBUG_THREAD_CONTEXT_SWITCH 0 #define TRACE_CONTEXT_SWITCH(str, x...) \ do { \ if (DEBUG_THREAD_CONTEXT_SWITCH) \ printf("CS " str, ##x); \ } while (0) // threads get 10ms to run before they use up their time slice and the scheduler is invoked #define THREAD_INITIAL_TIME_SLICE ZX_MSEC(10) static bool local_migrate_if_needed(thread_t* curr_thread); // compute the effective priority of a thread static void compute_effec_priority(thread_t* t) { int ep = t->base_priority + t->priority_boost; if (t->inherited_priority > ep) { ep = t->inherited_priority; } DEBUG_ASSERT(ep >= LOWEST_PRIORITY && ep <= HIGHEST_PRIORITY); t->effec_priority = ep; } // boost the priority of the thread by +1 static void boost_thread(thread_t* t) { if (NO_BOOST) { return; } if (unlikely(thread_is_real_time_or_idle(t))) { return; } if (t->priority_boost < MAX_PRIORITY_ADJ && likely((t->base_priority + t->priority_boost) < HIGHEST_PRIORITY)) { t->priority_boost++; compute_effec_priority(t); } } // deboost the priority of the thread by -1. // If deboosting because the thread is using up all of its time slice, // then allow the boost to go negative, otherwise only deboost to 0. static void deboost_thread(thread_t* t, bool quantum_expiration) { if (NO_BOOST) { return; } if (unlikely(thread_is_real_time_or_idle(t))) { return; } int boost_floor; if (quantum_expiration) { // deboost into negative boost boost_floor = -MAX_PRIORITY_ADJ; // make sure we dont deboost a thread too far if (unlikely(t->base_priority + boost_floor < LOWEST_PRIORITY)) { boost_floor = t->base_priority - LOWEST_PRIORITY; } } else { // otherwise only deboost to 0 boost_floor = 0; } // if we're already bottomed out or below bottomed out, leave it alone if (t->priority_boost <= boost_floor) { return; } // drop a level t->priority_boost--; compute_effec_priority(t); } // pick a 'random' cpu out of the passed in mask of cpus static cpu_mask_t rand_cpu(cpu_mask_t mask) { if (unlikely(mask == 0)) { return 0; } // check that the mask passed in has at least one bit set in the active mask cpu_mask_t active = mp_get_active_mask(); mask &= active; if (unlikely(mask == 0)) { return 0; } // compute the highest cpu in the mask cpu_num_t highest_cpu = highest_cpu_set(mask); // not very random, round robins a bit through the mask until it gets a hit for (;;) { // protected by THREAD_LOCK, safe to use non atomically static uint rot = 0; if (++rot > highest_cpu) { rot = 0; } if ((1u << rot) & mask) { return (1u << rot); } } } // find a cpu to wake up static cpu_mask_t find_cpu_mask(thread_t* t) TA_REQ(thread_lock) { // get the last cpu the thread ran on cpu_mask_t last_ran_cpu_mask = cpu_num_to_mask(t->last_cpu); // the current cpu cpu_mask_t curr_cpu_mask = cpu_num_to_mask(arch_curr_cpu_num()); // the thread's affinity mask cpu_mask_t cpu_affinity = t->cpu_affinity; LTRACEF_LEVEL(2, "last %#x curr %#x aff %#x name %s\n", last_ran_cpu_mask, curr_cpu_mask, cpu_affinity, t->name); // get a list of idle cpus and mask off the ones that aren't in our affinity mask cpu_mask_t idle_cpu_mask = mp_get_idle_mask(); cpu_mask_t active_cpu_mask = mp_get_active_mask(); idle_cpu_mask &= cpu_affinity; if (idle_cpu_mask != 0) { if (idle_cpu_mask & curr_cpu_mask) { // the current cpu is idle and within our affinity mask, so run it here return curr_cpu_mask; } if (last_ran_cpu_mask & idle_cpu_mask) { DEBUG_ASSERT(last_ran_cpu_mask & mp_get_active_mask()); // the last core it ran on is idle and isn't the current cpu return last_ran_cpu_mask; } // pick an idle_cpu DEBUG_ASSERT((idle_cpu_mask & mp_get_active_mask()) == idle_cpu_mask); return rand_cpu(idle_cpu_mask); } // no idle cpus in our affinity mask // if the last cpu it ran on is in the affinity mask and not the current cpu, pick that if ((last_ran_cpu_mask & cpu_affinity & active_cpu_mask) && last_ran_cpu_mask != curr_cpu_mask) { return last_ran_cpu_mask; } // fall back to picking a cpu out of the affinity mask, preferring something other // than the local cpu. // the affinity mask hard pins the thread to the cpus in the mask, so it's not possible // to pick a cpu outside of that list. cpu_mask_t mask = cpu_affinity & ~(curr_cpu_mask); if (mask == 0) { return curr_cpu_mask; // local cpu is the only choice } mask = rand_cpu(mask); if (mask == 0) { return curr_cpu_mask; // local cpu is the only choice } DEBUG_ASSERT((mask & mp_get_active_mask()) == mask); return mask; } // run queue manipulation static void insert_in_run_queue_head(cpu_num_t cpu, thread_t* t) TA_REQ(thread_lock) { DEBUG_ASSERT(!list_in_list(&t->queue_node)); list_add_head(&percpu[cpu].run_queue[t->effec_priority], &t->queue_node); percpu[cpu].run_queue_bitmap |= (1u << t->effec_priority); // mark the cpu as busy since the run queue now has at least one item in it mp_set_cpu_busy(cpu); } static void insert_in_run_queue_tail(cpu_num_t cpu, thread_t* t) TA_REQ(thread_lock) { DEBUG_ASSERT(!list_in_list(&t->queue_node)); list_add_tail(&percpu[cpu].run_queue[t->effec_priority], &t->queue_node); percpu[cpu].run_queue_bitmap |= (1u << t->effec_priority); // mark the cpu as busy since the run queue now has at least one item in it mp_set_cpu_busy(cpu); } // remove the thread from the run queue it's in static void remove_from_run_queue(thread_t* t, int prio_queue) TA_REQ(thread_lock) { DEBUG_ASSERT(t->state == THREAD_READY); DEBUG_ASSERT(is_valid_cpu_num(t->curr_cpu)); list_delete(&t->queue_node); // clear the old cpu's queue bitmap if that was the last entry struct percpu* c = &percpu[t->curr_cpu]; if (list_is_empty(&c->run_queue[prio_queue])) { c->run_queue_bitmap &= ~(1u << prio_queue); } } // using the per cpu run queue bitmap, find the highest populated queue static uint highest_run_queue(const struct percpu* c) TA_REQ(thread_lock) { return HIGHEST_PRIORITY - __builtin_clz(c->run_queue_bitmap) - (sizeof(c->run_queue_bitmap) * CHAR_BIT - NUM_PRIORITIES); } static thread_t* sched_get_top_thread(cpu_num_t cpu) TA_REQ(thread_lock) { // pop the head of the highest priority queue with any threads // queued up on the passed in cpu. struct percpu* c = &percpu[cpu]; if (likely(c->run_queue_bitmap)) { uint highest_queue = highest_run_queue(c); thread_t* newthread = list_remove_head_type(&c->run_queue[highest_queue], thread_t, queue_node); DEBUG_ASSERT(newthread); DEBUG_ASSERT_MSG(newthread->cpu_affinity & cpu_num_to_mask(cpu), "thread %p name %s, aff %#x cpu %u\n", newthread, newthread->name, newthread->cpu_affinity, cpu); DEBUG_ASSERT(newthread->curr_cpu == cpu); if (list_is_empty(&c->run_queue[highest_queue])) { c->run_queue_bitmap &= ~(1u << highest_queue); } LOCAL_KTRACE2("sched_get_top", newthread->priority_boost, newthread->base_priority); return newthread; } // no threads to run, select the idle thread for this cpu return &c->idle_thread; } void sched_init_thread(thread_t* t, int priority) { t->base_priority = priority; t->priority_boost = 0; t->inherited_priority = -1; compute_effec_priority(t); } void sched_block() { DEBUG_ASSERT(spin_lock_held(&thread_lock)); __UNUSED thread_t* current_thread = get_current_thread(); DEBUG_ASSERT(current_thread->magic == THREAD_MAGIC); DEBUG_ASSERT(current_thread->state != THREAD_RUNNING); LOCAL_KTRACE0("sched_block"); // we are blocking on something. the blocking code should have already stuck us on a queue sched_resched_internal(); } // find a cpu to run the thread on, put it in the run queue for that cpu, and accumulate a list // of cpus we'll need to reschedule, including the local cpu. static void find_cpu_and_insert(thread_t* t, bool* local_resched, cpu_mask_t* accum_cpu_mask) TA_REQ(thread_lock) { // find a core to run it on cpu_mask_t cpu = find_cpu_mask(t); cpu_num_t cpu_num; DEBUG_ASSERT(cpu != 0); cpu_num = lowest_cpu_set(cpu); if (cpu_num == arch_curr_cpu_num()) { *local_resched = true; } else { *accum_cpu_mask |= cpu_num_to_mask(cpu_num); } t->curr_cpu = cpu_num; if (t->remaining_time_slice > 0) { insert_in_run_queue_head(cpu_num, t); } else { insert_in_run_queue_tail(cpu_num, t); } } bool sched_unblock(thread_t* t) { DEBUG_ASSERT(spin_lock_held(&thread_lock)); DEBUG_ASSERT(t->magic == THREAD_MAGIC); LOCAL_KTRACE0("sched_unblock"); // thread is being woken up, boost its priority boost_thread(t); // stuff the new thread in the run queue t->state = THREAD_READY; bool local_resched = false; cpu_mask_t mask = 0; find_cpu_and_insert(t, &local_resched, &mask); if (mask) { mp_reschedule(mask, 0); } return local_resched; } bool sched_unblock_list(struct list_node* list) { DEBUG_ASSERT(list); DEBUG_ASSERT(spin_lock_held(&thread_lock)); LOCAL_KTRACE0("sched_unblock_list"); // pop the list of threads and shove into the scheduler bool local_resched = false; cpu_mask_t accum_cpu_mask = 0; thread_t* t; while ((t = list_remove_tail_type(list, thread_t, queue_node))) { DEBUG_ASSERT(t->magic == THREAD_MAGIC); DEBUG_ASSERT(!thread_is_idle(t)); // thread is being woken up, boost its priority boost_thread(t); // stuff the new thread in the run queue t->state = THREAD_READY; find_cpu_and_insert(t, &local_resched, &accum_cpu_mask); } if (accum_cpu_mask) { mp_reschedule(accum_cpu_mask, 0); } return local_resched; } // handle the special case of resuming a newly created idle thread void sched_unblock_idle(thread_t* t) { DEBUG_ASSERT(spin_lock_held(&thread_lock)); DEBUG_ASSERT(thread_is_idle(t)); DEBUG_ASSERT(t->cpu_affinity && (t->cpu_affinity & (t->cpu_affinity - 1)) == 0); // idle thread is special case, just jam it into the cpu's run queue in the thread's // affinity mask and mark it ready. t->state = THREAD_READY; cpu_num_t cpu = lowest_cpu_set(t->cpu_affinity); t->curr_cpu = cpu; insert_in_run_queue_head(cpu, t); } // the thread is voluntarily giving up its time slice void sched_yield() { DEBUG_ASSERT(spin_lock_held(&thread_lock)); thread_t* current_thread = get_current_thread(); DEBUG_ASSERT(!thread_is_idle(current_thread)); LOCAL_KTRACE0("sched_yield"); // consume the rest of the time slice, deboost ourself, and go to the end of a queue current_thread->remaining_time_slice = 0; deboost_thread(current_thread, false); current_thread->state = THREAD_READY; if (local_migrate_if_needed(current_thread)) { return; } insert_in_run_queue_tail(arch_curr_cpu_num(), current_thread); sched_resched_internal(); } // the current thread is being preempted from interrupt context void sched_preempt() { DEBUG_ASSERT(spin_lock_held(&thread_lock)); thread_t* current_thread = get_current_thread(); uint curr_cpu = arch_curr_cpu_num(); DEBUG_ASSERT(current_thread->curr_cpu == curr_cpu); DEBUG_ASSERT(current_thread->last_cpu == current_thread->curr_cpu); LOCAL_KTRACE0("sched_preempt"); current_thread->state = THREAD_READY; // idle thread doesn't go in the run queue if (likely(!thread_is_idle(current_thread))) { if (current_thread->remaining_time_slice <= 0) { // if we're out of quantum, deboost the thread and put it at the tail of a queue deboost_thread(current_thread, true); } if (local_migrate_if_needed(current_thread)) { return; } if (current_thread->remaining_time_slice > 0) { insert_in_run_queue_head(curr_cpu, current_thread); } else { insert_in_run_queue_tail(curr_cpu, current_thread); } } sched_resched_internal(); } // the current thread is voluntarily reevaluating the scheduler on the current cpu void sched_reschedule() { DEBUG_ASSERT(spin_lock_held(&thread_lock)); thread_t* current_thread = get_current_thread(); uint curr_cpu = arch_curr_cpu_num(); if (current_thread->disable_counts != 0) { current_thread->preempt_pending = true; return; } DEBUG_ASSERT(current_thread->curr_cpu == curr_cpu); DEBUG_ASSERT(current_thread->last_cpu == current_thread->curr_cpu); LOCAL_KTRACE0("sched_reschedule"); current_thread->state = THREAD_READY; // idle thread doesn't go in the run queue if (likely(!thread_is_idle(current_thread))) { // deboost the current thread deboost_thread(current_thread, false); if (local_migrate_if_needed(current_thread)) { return; } if (current_thread->remaining_time_slice > 0) { insert_in_run_queue_head(curr_cpu, current_thread); } else { insert_in_run_queue_tail(curr_cpu, current_thread); } } sched_resched_internal(); } // migrate the current thread to a new cpu and locally reschedule to seal the deal static void migrate_current_thread(thread_t* current_thread) TA_REQ(thread_lock) { bool local_resched = false; cpu_mask_t accum_cpu_mask = 0; // current thread, so just shove ourself into another cpu's queue and reschedule locally current_thread->state = THREAD_READY; find_cpu_and_insert(current_thread, &local_resched, &accum_cpu_mask); if (accum_cpu_mask) { mp_reschedule(accum_cpu_mask, 0); } sched_resched_internal(); } // migrate all non-pinned threads assigned to |old_cpu| to other queues // // must be called on |old_cpu| void sched_transition_off_cpu(cpu_num_t old_cpu) { DEBUG_ASSERT(spin_lock_held(&thread_lock)); DEBUG_ASSERT(old_cpu == arch_curr_cpu_num()); // Ensure we do not get scheduled on anymore. mp_set_curr_cpu_active(false); thread_t* t; bool local_resched = false; cpu_mask_t accum_cpu_mask = 0; cpu_mask_t pinned_mask = cpu_num_to_mask(old_cpu); list_node_t pinned_threads = LIST_INITIAL_VALUE(pinned_threads); while (!thread_is_idle(t = sched_get_top_thread(old_cpu))) { // Threads pinned to old_cpu can't run anywhere else, so put them // into a temporary list and deal with them later. if (t->cpu_affinity != pinned_mask) { find_cpu_and_insert(t, &local_resched, &accum_cpu_mask); DEBUG_ASSERT(!local_resched); } else { DEBUG_ASSERT(!list_in_list(&t->queue_node)); list_add_head(&pinned_threads, &t->queue_node); } } // Put pinned threads back on old_cpu's queue. while ((t = list_remove_head_type(&pinned_threads, thread_t, queue_node)) != NULL) { insert_in_run_queue_head(old_cpu, t); } if (accum_cpu_mask) { mp_reschedule(accum_cpu_mask, 0); } } // check to see if the current thread needs to migrate to a new core // the passed argument must be the current thread and must already be pushed into the READY state static bool local_migrate_if_needed(thread_t* curr_thread) TA_REQ(thread_lock) { DEBUG_ASSERT(curr_thread == get_current_thread()); DEBUG_ASSERT(curr_thread->state == THREAD_READY); // if the affinity mask does not include the current cpu, migrate us right now if (unlikely((curr_thread->cpu_affinity & cpu_num_to_mask(curr_thread->curr_cpu)) == 0)) { migrate_current_thread(curr_thread); return true; } return false; } // potentially migrate a thread to a new core based on the affinity mask on the thread. If it's // running or in a scheduler queue, handle it. void sched_migrate(thread_t* t) { DEBUG_ASSERT(spin_lock_held(&thread_lock)); bool local_resched = false; cpu_mask_t accum_cpu_mask = 0; switch (t->state) { case THREAD_RUNNING: // see if we need to migrate if (t->cpu_affinity & cpu_num_to_mask(t->curr_cpu)) { // it's running and the new mask contains the core it's already running on, nothing to do. //TRACEF("t %p nomigrate\n", t); return; } // we need to migrate if (t == get_current_thread()) { // current thread, so just shove ourself into another cpu's queue and reschedule locally migrate_current_thread(t); return; } else { // running on another cpu, interrupt and let sched_preempt() sort it out accum_cpu_mask = cpu_num_to_mask(t->curr_cpu); } break; case THREAD_READY: if (t->cpu_affinity & cpu_num_to_mask(t->curr_cpu)) { // it's ready and the new mask contains the core it's already waiting on, nothing to do. //TRACEF("t %p nomigrate\n", t); return; } // it's sitting in a run queue somewhere, so pull it out of that one and find a new home DEBUG_ASSERT_MSG(list_in_list(&t->queue_node), "thread %p name %s curr_cpu %u\n", t, t->name, t->curr_cpu); remove_from_run_queue(t, t->effec_priority); find_cpu_and_insert(t, &local_resched, &accum_cpu_mask); break; default: // the other states do not matter, exit return; } // send some ipis based on the previous code if (accum_cpu_mask) { mp_reschedule(accum_cpu_mask, 0); } if (local_resched) { sched_reschedule(); } } // the effective priority of a thread has changed, do what is necessary to move the thread // from different queues and inform us if we need to reschedule static void sched_priority_changed(thread_t* t, int old_prio, bool* local_resched, cpu_mask_t* accum_cpu_mask) TA_REQ(thread_lock) { switch (t->state) { case THREAD_RUNNING: if (t->effec_priority < old_prio) { // we're currently running and dropped our effective priority, might want to resched if (t == get_current_thread()) { *local_resched = true; } else { *accum_cpu_mask |= cpu_num_to_mask(t->curr_cpu); } } break; case THREAD_READY: // it's sitting in a run queue somewhere, remove and add back to the proper queue on that cpu DEBUG_ASSERT_MSG(list_in_list(&t->queue_node), "thread %p name %s curr_cpu %u\n", t, t->name, t->curr_cpu); remove_from_run_queue(t, old_prio); // insert ourself into the new queue if (t->effec_priority > old_prio) { insert_in_run_queue_head(t->curr_cpu, t); // we may now be higher priority than the current thread on this cpu, reschedule if (t->curr_cpu == arch_curr_cpu_num()) { *local_resched = true; } else { *accum_cpu_mask |= cpu_num_to_mask(t->curr_cpu); } } else { insert_in_run_queue_tail(t->curr_cpu, t); } break; case THREAD_BLOCKED: // it's blocked on something, sitting in a wait queue, so we may need to move it around // within the wait queue. // note it's possible to be blocked but not in a wait queue if the thread is in transition // from blocked to running if (t->blocking_wait_queue) { wait_queue_priority_changed(t, old_prio); } break; default: // the other states do not matter, exit return; } } // set the priority to the higher value of what it was before and the newly inherited value // pri < 0 disables priority inheritance and goes back to the naturally computed values void sched_inherit_priority(thread_t* t, int pri, bool* local_resched) { DEBUG_ASSERT(spin_lock_held(&thread_lock)); if (pri > HIGHEST_PRIORITY) { pri = HIGHEST_PRIORITY; } // if we're setting it to something real and it's less than the current, skip if (pri >= 0 && pri <= t->inherited_priority) { return; } // adjust the priority and remember the old value t->inherited_priority = pri; int old_ep = t->effec_priority; compute_effec_priority(t); if (old_ep == t->effec_priority) { // same effective priority, nothing to do return; } // see if we need to do something based on the state of the thread cpu_mask_t accum_cpu_mask = 0; sched_priority_changed(t, old_ep, local_resched, &accum_cpu_mask); // send some ipis based on the previous code if (accum_cpu_mask) { mp_reschedule(accum_cpu_mask, 0); } } // changes the thread's base priority and if the re-computed effective priority changed // then the thread is moved to the proper queue on the same processor and a re-schedule // might be issued. void sched_change_priority(thread_t* t, int pri) { DEBUG_ASSERT(spin_lock_held(&thread_lock)); if (unlikely(t->state == THREAD_DEATH)) { return; } if (pri > HIGHEST_PRIORITY) { pri = HIGHEST_PRIORITY; } int old_ep = t->effec_priority; t->base_priority = pri; t->priority_boost = 0; compute_effec_priority(t); if (old_ep == t->effec_priority) { // No effective change so we exit. The boost has reset but that's ok. return; } cpu_mask_t accum_cpu_mask = 0; bool local_resched = false; // see if we need to do something based on the state of the thread. sched_priority_changed(t, old_ep, &local_resched, &accum_cpu_mask); // send some ipis based on the previous code if (accum_cpu_mask) { mp_reschedule(accum_cpu_mask, 0); } if (local_resched) { sched_reschedule(); } } // preemption timer that is set whenever a thread is scheduled void sched_preempt_timer_tick(zx_time_t now) { // if the preemption timer went off on the idle or a real time thread, ignore it thread_t* current_thread = get_current_thread(); if (unlikely(thread_is_real_time_or_idle(current_thread))) { return; } LOCAL_KTRACE2("sched_preempt_timer_tick", (uint32_t)current_thread->user_tid, current_thread->remaining_time_slice); // did this tick complete the time slice? DEBUG_ASSERT(now > current_thread->last_started_running); zx_duration_t delta = zx_time_sub_time(now, current_thread->last_started_running); if (delta >= current_thread->remaining_time_slice) { // we completed the time slice, do not restart it and let the scheduler run current_thread->remaining_time_slice = 0; // set a timer to go off on the time slice interval from now timer_preempt_reset(zx_time_add_duration(now, THREAD_INITIAL_TIME_SLICE)); // Mark a reschedule as pending. The irq handler will call back // into us with sched_preempt(). thread_preempt_set_pending(); } else { // the timer tick must have fired early, reschedule and continue zx_time_t deadline = zx_time_add_duration(current_thread->last_started_running, current_thread->remaining_time_slice); timer_preempt_reset(deadline); } } // On ARM64 with safe-stack, it's no longer possible to use the unsafe-sp // after set_current_thread (we'd now see newthread's unsafe-sp instead!). // Hence this function and everything it calls between this point and the // the low-level context switch must be marked with __NO_SAFESTACK. __NO_SAFESTACK static void final_context_switch(thread_t* oldthread, thread_t* newthread) { set_current_thread(newthread); arch_context_switch(oldthread, newthread); } // Internal reschedule routine. The current thread needs to already be in whatever // state and queues it needs to be in. This routine simply picks the next thread and // switches to it. void sched_resched_internal() { thread_t* current_thread = get_current_thread(); uint cpu = arch_curr_cpu_num(); DEBUG_ASSERT(arch_ints_disabled()); DEBUG_ASSERT(spin_lock_held(&thread_lock)); DEBUG_ASSERT_MSG(current_thread->state != THREAD_RUNNING, "state %d\n", current_thread->state); DEBUG_ASSERT(!arch_blocking_disallowed()); CPU_STATS_INC(reschedules); // pick a new thread to run thread_t* newthread = sched_get_top_thread(cpu); DEBUG_ASSERT(newthread); newthread->state = THREAD_RUNNING; thread_t* oldthread = current_thread; oldthread->preempt_pending = false; LOCAL_KTRACE2("resched old pri", (uint32_t)oldthread->user_tid, effec_priority(oldthread)); LOCAL_KTRACE2("resched new pri", (uint32_t)newthread->user_tid, effec_priority(newthread)); // call this even if we're not changing threads, to handle the case where another // core rescheduled us but the work disappeared before we got to run. mp_prepare_current_cpu_idle_state(thread_is_idle(newthread)); // if it's the same thread as we're already running, exit if (newthread == oldthread) { return; } zx_time_t now = current_time(); // account for time used on the old thread DEBUG_ASSERT(now >= oldthread->last_started_running); zx_duration_t old_runtime = zx_time_sub_time(now, oldthread->last_started_running); oldthread->runtime_ns = zx_duration_add_duration(oldthread->runtime_ns, old_runtime); oldthread->remaining_time_slice = zx_duration_sub_duration( oldthread->remaining_time_slice, MIN(old_runtime, oldthread->remaining_time_slice)); // set up quantum for the new thread if it was consumed if (newthread->remaining_time_slice == 0) { newthread->remaining_time_slice = THREAD_INITIAL_TIME_SLICE; } newthread->last_started_running = now; // mark the cpu ownership of the threads if (oldthread->state != THREAD_READY) { oldthread->curr_cpu = INVALID_CPU; } newthread->last_cpu = cpu; newthread->curr_cpu = cpu; // if we selected the idle thread the cpu's run queue must be empty, so mark the // cpu as idle if (thread_is_idle(newthread)) { mp_set_cpu_idle(cpu); } if (thread_is_realtime(newthread)) { mp_set_cpu_realtime(cpu); } else { mp_set_cpu_non_realtime(cpu); } CPU_STATS_INC(context_switches); if (thread_is_idle(oldthread)) { zx_duration_t delta = zx_time_sub_time(now, oldthread->last_started_running); percpu[cpu].stats.idle_time = zx_duration_add_duration(percpu[cpu].stats.idle_time, delta); } LOCAL_KTRACE2("CS timeslice old", (uint32_t)oldthread->user_tid, oldthread->remaining_time_slice); LOCAL_KTRACE2("CS timeslice new", (uint32_t)newthread->user_tid, newthread->remaining_time_slice); ktrace(TAG_CONTEXT_SWITCH, (uint32_t)newthread->user_tid, (cpu | (oldthread->state << 8) | (oldthread->effec_priority << 16) | (newthread->effec_priority << 24)), (uint32_t)(uintptr_t)oldthread, (uint32_t)(uintptr_t)newthread); if (thread_is_real_time_or_idle(newthread)) { if (!thread_is_real_time_or_idle(oldthread)) { // if we're switching from a non real time to a real time, cancel // the preemption timer. TRACE_CONTEXT_SWITCH("stop preempt, cpu %u, old %p (%s), new %p (%s)\n", cpu, oldthread, oldthread->name, newthread, newthread->name); timer_preempt_cancel(); } } else { // set up a one shot timer to handle the remaining time slice on this thread TRACE_CONTEXT_SWITCH("start preempt, cpu %u, old %p (%s), new %p (%s)\n", cpu, oldthread, oldthread->name, newthread, newthread->name); // make sure the time slice is reasonable DEBUG_ASSERT(newthread->remaining_time_slice > 0 && newthread->remaining_time_slice < ZX_SEC(1)); timer_preempt_reset(zx_time_add_duration(now, newthread->remaining_time_slice)); } // set some optional target debug leds target_set_debug_led(0, !thread_is_idle(newthread)); TRACE_CONTEXT_SWITCH("cpu %u old %p (%s, pri %d [%d:%d], flags 0x%x) " "new %p (%s, pri %d [%d:%d], flags 0x%x)\n", cpu, oldthread, oldthread->name, oldthread->effec_priority, oldthread->base_priority, oldthread->priority_boost, oldthread->flags, newthread, newthread->name, newthread->effec_priority, newthread->base_priority, newthread->priority_boost, newthread->flags); // see if we need to swap mmu context if (newthread->aspace != oldthread->aspace) { vmm_context_switch(oldthread->aspace, newthread->aspace); } // do the low level context switch final_context_switch(oldthread, newthread); } void sched_init_early() { // initialize the run queues for (unsigned int cpu = 0; cpu < SMP_MAX_CPUS; cpu++) for (unsigned int i = 0; i < NUM_PRIORITIES; i++) { list_initialize(&percpu[cpu].run_queue[i]); } }