1/*
2 * Copyright 2014, General Dynamics C4 Systems
3 *
4 * This software may be distributed and modified according to the terms of
5 * the GNU General Public License version 2. Note that NO WARRANTY is provided.
6 * See "LICENSE_GPLv2.txt" for details.
7 *
8 * @TAG(GD_GPL)
9 */
10
11#include <config.h>
12#include <object.h>
13#include <util.h>
14#include <api/faults.h>
15#include <api/types.h>
16#include <kernel/cspace.h>
17#include <kernel/thread.h>
18#include <kernel/vspace.h>
19#include <object/schedcontext.h>
20#include <model/statedata.h>
21#include <arch/machine.h>
22#include <arch/kernel/thread.h>
23#include <machine/registerset.h>
24#include <linker.h>
25
26static seL4_MessageInfo_t
27transferCaps(seL4_MessageInfo_t info, extra_caps_t caps,
28             endpoint_t *endpoint, tcb_t *receiver,
29             word_t *receiveBuffer);
30
31static inline bool_t PURE
32isBlocked(const tcb_t *thread)
33{
34    switch (thread_state_get_tsType(thread->tcbState)) {
35    case ThreadState_Inactive:
36    case ThreadState_BlockedOnReceive:
37    case ThreadState_BlockedOnSend:
38    case ThreadState_BlockedOnNotification:
39    case ThreadState_BlockedOnReply:
40        return true;
41
42    default:
43        return false;
44    }
45}
46
47static inline bool_t PURE
48isSchedulable(const tcb_t *thread)
49{
50    return isRunnable(thread) &&
51           thread->tcbSchedContext != NULL &&
52           thread->tcbSchedContext->scRefillMax > 0 &&
53           !thread_state_get_tcbInReleaseQueue(thread->tcbState);
54}
55
56BOOT_CODE void
57configureIdleThread(tcb_t *tcb)
58{
59    Arch_configureIdleThread(tcb);
60    setThreadState(tcb, ThreadState_IdleThreadState);
61}
62
63void
64activateThread(void)
65{
66    if (unlikely(NODE_STATE(ksCurThread)->tcbYieldTo)) {
67        schedContext_completeYieldTo(NODE_STATE(ksCurThread));
68        assert(thread_state_get_tsType(NODE_STATE(ksCurThread)->tcbState) == ThreadState_Running);
69    }
70
71    switch (thread_state_get_tsType(NODE_STATE(ksCurThread)->tcbState)) {
72    case ThreadState_Running:
73#ifdef CONFIG_VTX
74    case ThreadState_RunningVM:
75#endif
76        break;
77
78    case ThreadState_Restart: {
79        word_t pc;
80
81        pc = getRestartPC(NODE_STATE(ksCurThread));
82        setNextPC(NODE_STATE(ksCurThread), pc);
83        setThreadState(NODE_STATE(ksCurThread), ThreadState_Running);
84        break;
85    }
86
87    case ThreadState_IdleThreadState:
88        Arch_activateIdleThread(NODE_STATE(ksCurThread));
89        break;
90
91    default:
92        fail("Current thread is blocked");
93    }
94}
95
96void
97suspend(tcb_t *target)
98{
99    cancelIPC(target);
100    setThreadState(target, ThreadState_Inactive);
101    tcbSchedDequeue(target);
102    tcbReleaseRemove(target);
103    schedContext_cancelYieldTo(target);
104}
105
106void
107restart(tcb_t *target)
108{
109    if (isBlocked(target)) {
110        cancelIPC(target);
111        setThreadState(target, ThreadState_Restart);
112        schedContext_resume(target->tcbSchedContext);
113        if (isSchedulable(target)) {
114            possibleSwitchTo(target);
115        }
116    }
117}
118
119void
120doIPCTransfer(tcb_t *sender, endpoint_t *endpoint, word_t badge,
121              bool_t grant, tcb_t *receiver)
122{
123    void *receiveBuffer, *sendBuffer;
124
125    receiveBuffer = lookupIPCBuffer(true, receiver);
126
127    if (likely(seL4_Fault_get_seL4_FaultType(sender->tcbFault) == seL4_Fault_NullFault)) {
128        sendBuffer = lookupIPCBuffer(false, sender);
129        doNormalTransfer(sender, sendBuffer, endpoint, badge, grant,
130                         receiver, receiveBuffer);
131    } else {
132        doFaultTransfer(badge, sender, receiver, receiveBuffer);
133    }
134}
135
136void
137doReplyTransfer(tcb_t *sender, reply_t *reply)
138{
139    if (reply->replyTCB == NULL ||
140            thread_state_get_tsType(reply->replyTCB->tcbState) != ThreadState_BlockedOnReply) {
141        /* nothing to do */
142        return;
143    }
144
145    tcb_t *receiver = reply->replyTCB;
146    reply_remove(reply);
147    assert(thread_state_get_replyObject(receiver->tcbState) == REPLY_REF(0));
148    assert(reply->replyTCB == NULL);
149
150    word_t fault_type = seL4_Fault_get_seL4_FaultType(receiver->tcbFault);
151    if (likely(fault_type == seL4_Fault_NullFault)) {
152        doIPCTransfer(sender, NULL, 0, true, receiver);
153        setThreadState(receiver, ThreadState_Running);
154    } else {
155        bool_t restart = handleFaultReply(receiver, sender);
156        receiver->tcbFault = seL4_Fault_NullFault_new();
157        if (restart) {
158            setThreadState(receiver, ThreadState_Restart);
159        } else {
160            setThreadState(receiver, ThreadState_Inactive);
161        }
162    }
163
164    if (receiver->tcbSchedContext && isRunnable(receiver)) {
165        if ((refill_ready(receiver->tcbSchedContext) && refill_sufficient(receiver->tcbSchedContext, 0))) {
166            possibleSwitchTo(receiver);
167        } else {
168            if (validTimeoutHandler(receiver) && fault_type != seL4_Fault_Timeout) {
169                current_fault = seL4_Fault_Timeout_new(receiver->tcbSchedContext->scBadge);
170                handleTimeout(receiver);
171            } else {
172                postpone(receiver->tcbSchedContext);
173            }
174        }
175    }
176}
177
178void
179doNormalTransfer(tcb_t *sender, word_t *sendBuffer, endpoint_t *endpoint,
180                 word_t badge, bool_t canGrant, tcb_t *receiver,
181                 word_t *receiveBuffer)
182{
183    word_t msgTransferred;
184    seL4_MessageInfo_t tag;
185    exception_t status;
186    extra_caps_t caps;
187
188    tag = messageInfoFromWord(getRegister(sender, msgInfoRegister));
189
190    if (canGrant) {
191        status = lookupExtraCaps(sender, sendBuffer, tag);
192        caps = current_extra_caps;
193        if (unlikely(status != EXCEPTION_NONE)) {
194            caps.excaprefs[0] = NULL;
195        }
196    } else {
197        caps = current_extra_caps;
198        caps.excaprefs[0] = NULL;
199    }
200
201    msgTransferred = copyMRs(sender, sendBuffer, receiver, receiveBuffer,
202                             seL4_MessageInfo_get_length(tag));
203
204    tag = transferCaps(tag, caps, endpoint, receiver, receiveBuffer);
205
206    tag = seL4_MessageInfo_set_length(tag, msgTransferred);
207    setRegister(receiver, msgInfoRegister, wordFromMessageInfo(tag));
208    setRegister(receiver, badgeRegister, badge);
209}
210
211void
212doFaultTransfer(word_t badge, tcb_t *sender, tcb_t *receiver,
213                word_t *receiverIPCBuffer)
214{
215    word_t sent;
216    seL4_MessageInfo_t msgInfo;
217
218    sent = setMRs_fault(sender, receiver, receiverIPCBuffer);
219    msgInfo = seL4_MessageInfo_new(
220                  seL4_Fault_get_seL4_FaultType(sender->tcbFault), 0, 0, sent);
221    setRegister(receiver, msgInfoRegister, wordFromMessageInfo(msgInfo));
222    setRegister(receiver, badgeRegister, badge);
223}
224
225/* Like getReceiveSlots, this is specialised for single-cap transfer. */
226static seL4_MessageInfo_t
227transferCaps(seL4_MessageInfo_t info, extra_caps_t caps,
228             endpoint_t *endpoint, tcb_t *receiver,
229             word_t *receiveBuffer)
230{
231    word_t i;
232    cte_t* destSlot;
233
234    info = seL4_MessageInfo_set_extraCaps(info, 0);
235    info = seL4_MessageInfo_set_capsUnwrapped(info, 0);
236
237    if (likely(!caps.excaprefs[0] || !receiveBuffer)) {
238        return info;
239    }
240
241    destSlot = getReceiveSlots(receiver, receiveBuffer);
242
243    for (i = 0; i < seL4_MsgMaxExtraCaps && caps.excaprefs[i] != NULL; i++) {
244        cte_t *slot = caps.excaprefs[i];
245        cap_t cap = slot->cap;
246
247        if (cap_get_capType(cap) == cap_endpoint_cap &&
248                EP_PTR(cap_endpoint_cap_get_capEPPtr(cap)) == endpoint) {
249            /* If this is a cap to the endpoint on which the message was sent,
250             * only transfer the badge, not the cap. */
251            setExtraBadge(receiveBuffer,
252                          cap_endpoint_cap_get_capEPBadge(cap), i);
253
254            info = seL4_MessageInfo_set_capsUnwrapped(info,
255                                                      seL4_MessageInfo_get_capsUnwrapped(info) | (1 << i));
256
257        } else {
258            deriveCap_ret_t dc_ret;
259
260            if (!destSlot) {
261                break;
262            }
263
264            dc_ret = deriveCap(slot, cap);
265
266            if (dc_ret.status != EXCEPTION_NONE) {
267                break;
268            }
269            if (cap_get_capType(dc_ret.cap) == cap_null_cap) {
270                break;
271            }
272
273            cteInsert(dc_ret.cap, slot, destSlot);
274
275            destSlot = NULL;
276        }
277    }
278
279    return seL4_MessageInfo_set_extraCaps(info, i);
280}
281
282void doNBRecvFailedTransfer(tcb_t *thread)
283{
284    /* Set the badge register to 0 to indicate there was no message */
285    setRegister(thread, badgeRegister, 0);
286}
287
288static void
289nextDomain(void)
290{
291    ksDomScheduleIdx++;
292    if (ksDomScheduleIdx >= ksDomScheduleLength) {
293        ksDomScheduleIdx = 0;
294    }
295    NODE_STATE(ksReprogram) = true;
296    ksWorkUnitsCompleted = 0;
297    ksCurDomain = ksDomSchedule[ksDomScheduleIdx].domain;
298    ksDomainTime = usToTicks(ksDomSchedule[ksDomScheduleIdx].length * US_IN_MS);
299}
300
301static void
302switchSchedContext(void)
303{
304    if (unlikely(NODE_STATE(ksCurSC) != NODE_STATE(ksCurThread)->tcbSchedContext)) {
305        NODE_STATE(ksReprogram) = true;
306        refill_unblock_check(NODE_STATE(ksCurThread->tcbSchedContext));
307
308        assert(refill_ready(NODE_STATE(ksCurThread->tcbSchedContext)));
309        assert(refill_sufficient(NODE_STATE(ksCurThread->tcbSchedContext), 0));
310    }
311
312    if (NODE_STATE(ksReprogram)) {
313        /* if we are reprogamming, we have acted on the new kernel time and cannot
314         * rollback -> charge the current thread */
315        commitTime();
316    } else {
317        /* otherwise, we don't need to do anything - avoid reprogramming the timer */
318        rollbackTime();
319    }
320
321    /* if a thread doesn't have enough budget, it should not be in the scheduler */
322    assert((refill_ready(NODE_STATE(ksCurSC)) && refill_sufficient(NODE_STATE(ksCurSC), 0))
323           || !thread_state_get_tcbQueued(NODE_STATE(ksCurSC)->scTcb->tcbState));
324
325    NODE_STATE(ksCurSC) = NODE_STATE(ksCurThread)->tcbSchedContext;
326}
327
328static void
329scheduleChooseNewThread(void)
330{
331    if (ksDomainTime == 0) {
332        nextDomain();
333    }
334    chooseThread();
335}
336
337void
338schedule(void)
339{
340
341    awaken();
342
343    if (NODE_STATE(ksSchedulerAction) != SchedulerAction_ResumeCurrentThread) {
344        bool_t was_runnable;
345        if (isSchedulable(NODE_STATE(ksCurThread))) {
346            was_runnable = true;
347            SCHED_ENQUEUE_CURRENT_TCB;
348        } else {
349            was_runnable = false;
350        }
351
352        if (NODE_STATE(ksSchedulerAction) == SchedulerAction_ChooseNewThread) {
353            scheduleChooseNewThread();
354        } else {
355            tcb_t *candidate = NODE_STATE(ksSchedulerAction);
356            assert(isSchedulable(candidate));
357            /* Avoid checking bitmap when ksCurThread is higher prio, to
358             * match fast path.
359             * Don't look at ksCurThread prio when it's idle, to respect
360             * information flow in non-fastpath cases. */
361            bool_t fastfail =
362                NODE_STATE(ksCurThread) == NODE_STATE(ksIdleThread)
363                || (candidate->tcbPriority < NODE_STATE(ksCurThread)->tcbPriority);
364            if (fastfail &&
365                    !isHighestPrio(ksCurDomain, candidate->tcbPriority)) {
366                SCHED_ENQUEUE(candidate);
367                /* we can't, need to reschedule */
368                NODE_STATE(ksSchedulerAction) = SchedulerAction_ChooseNewThread;
369                scheduleChooseNewThread();
370            } else if (was_runnable && candidate->tcbPriority == NODE_STATE(ksCurThread)->tcbPriority) {
371                /* We append the candidate at the end of the scheduling queue, that way the
372                 * current thread, that was enqueued at the start of the scheduling queue
373                 * will get picked during chooseNewThread */
374                SCHED_APPEND(candidate);
375                NODE_STATE(ksSchedulerAction) = SchedulerAction_ChooseNewThread;
376                scheduleChooseNewThread();
377            } else {
378                assert(candidate != NODE_STATE(ksCurThread));
379                switchToThread(candidate);
380            }
381        }
382    }
383    NODE_STATE(ksSchedulerAction) = SchedulerAction_ResumeCurrentThread;
384#ifdef ENABLE_SMP_SUPPORT
385    doMaskReschedule(ARCH_NODE_STATE(ipiReschedulePending));
386    ARCH_NODE_STATE(ipiReschedulePending) = 0;
387#endif /* ENABLE_SMP_SUPPORT */
388
389    switchSchedContext();
390
391    if (NODE_STATE(ksReprogram)) {
392        setNextInterrupt();
393        NODE_STATE(ksReprogram) = false;
394    }
395}
396
397void
398chooseThread(void)
399{
400    word_t prio;
401    word_t dom;
402    tcb_t *thread;
403
404    if (CONFIG_NUM_DOMAINS > 1) {
405        dom = ksCurDomain;
406    } else {
407        dom = 0;
408    }
409
410    if (likely(NODE_STATE(ksReadyQueuesL1Bitmap[dom]))) {
411        prio = getHighestPrio(dom);
412        thread = NODE_STATE(ksReadyQueues)[ready_queues_index(dom, prio)].head;
413        assert(thread);
414        assert(isSchedulable(thread));
415        assert(refill_sufficient(thread->tcbSchedContext, 0));
416        assert(refill_ready(thread->tcbSchedContext));
417        switchToThread(thread);
418    } else {
419        switchToIdleThread();
420    }
421}
422
423void
424switchToThread(tcb_t *thread)
425{
426    assert(thread->tcbSchedContext != NULL);
427    assert(!thread_state_get_tcbInReleaseQueue(thread->tcbState));
428    assert(refill_sufficient(thread->tcbSchedContext, 0));
429    assert(refill_ready(thread->tcbSchedContext));
430
431#ifdef CONFIG_BENCHMARK_TRACK_UTILISATION
432    benchmark_utilisation_switch(NODE_STATE(ksCurThread), thread);
433#endif
434    Arch_switchToThread(thread);
435    tcbSchedDequeue(thread);
436    NODE_STATE(ksCurThread) = thread;
437}
438
439void
440switchToIdleThread(void)
441{
442#ifdef CONFIG_BENCHMARK_TRACK_UTILISATION
443    benchmark_utilisation_switch(NODE_STATE(ksCurThread), NODE_STATE(ksIdleThread));
444#endif
445    Arch_switchToIdleThread();
446    NODE_STATE(ksCurThread) = NODE_STATE(ksIdleThread);
447}
448
449void
450setDomain(tcb_t *tptr, dom_t dom)
451{
452    tcbSchedDequeue(tptr);
453    tptr->tcbDomain = dom;
454    if (isSchedulable(tptr)) {
455        SCHED_ENQUEUE(tptr);
456    }
457    if (tptr == NODE_STATE(ksCurThread)) {
458        rescheduleRequired();
459    }
460}
461
462void
463setMCPriority(tcb_t *tptr, prio_t mcp)
464{
465    tptr->tcbMCP = mcp;
466}
467
468void
469setPriority(tcb_t *tptr, prio_t prio)
470{
471    tcbSchedDequeue(tptr);
472    tptr->tcbPriority = prio;
473
474    switch (thread_state_get_tsType(tptr->tcbState)) {
475    case ThreadState_Running:
476    case ThreadState_Restart:
477        if (isSchedulable(tptr)) {
478            SCHED_ENQUEUE(tptr);
479            rescheduleRequired();
480        }
481        break;
482    case ThreadState_BlockedOnReceive:
483    case ThreadState_BlockedOnSend:
484        reorderEP(EP_PTR(thread_state_get_blockingObject(tptr->tcbState)), tptr);
485        break;
486    case ThreadState_BlockedOnNotification:
487        reorderNTFN(NTFN_PTR(thread_state_get_blockingObject(tptr->tcbState)), tptr);
488        break;
489    default:
490        break;
491    }
492}
493
494/* Note that this thread will possibly continue at the end of this kernel
495 * entry. Do not queue it yet, since a queue+unqueue operation is wasteful
496 * if it will be picked. Instead, it waits in the 'ksSchedulerAction' site
497 * on which the scheduler will take action. */
498void
499possibleSwitchTo(tcb_t* target)
500{
501    if (target->tcbSchedContext != NULL && !thread_state_get_tcbInReleaseQueue(target->tcbState)) {
502        if (ksCurDomain != target->tcbDomain
503                SMP_COND_STATEMENT( || target->tcbAffinity != getCurrentCPUIndex())) {
504            SCHED_ENQUEUE(target);
505        } else if (NODE_STATE(ksSchedulerAction) != SchedulerAction_ResumeCurrentThread) {
506            /* Too many threads want special treatment, use regular queues. */
507            rescheduleRequired();
508            SCHED_ENQUEUE(target);
509        } else {
510            NODE_STATE(ksSchedulerAction) = target;
511        }
512    }
513}
514
515void
516setThreadState(tcb_t *tptr, _thread_state_t ts)
517{
518    thread_state_ptr_set_tsType(&tptr->tcbState, ts);
519    scheduleTCB(tptr);
520}
521
522void
523scheduleTCB(tcb_t *tptr)
524{
525    if (tptr == NODE_STATE(ksCurThread) &&
526            NODE_STATE(ksSchedulerAction) == SchedulerAction_ResumeCurrentThread &&
527            !isSchedulable(tptr)) {
528        rescheduleRequired();
529    }
530}
531
532void
533postpone(sched_context_t *sc)
534{
535    tcbSchedDequeue(sc->scTcb);
536    tcbReleaseEnqueue(sc->scTcb);
537    NODE_STATE_ON_CORE(ksReprogram, sc->scCore) = true;
538}
539
540void
541setNextInterrupt(void)
542{
543    time_t next_interrupt = NODE_STATE(ksCurTime) +
544                            REFILL_HEAD(NODE_STATE(ksCurThread)->tcbSchedContext).rAmount;
545
546    if (CONFIG_NUM_DOMAINS > 1) {
547        next_interrupt = MIN(next_interrupt, NODE_STATE(ksCurTime) + ksDomainTime);
548    }
549
550    if (NODE_STATE(ksReleaseHead) != NULL) {
551        next_interrupt = MIN(REFILL_HEAD(NODE_STATE(ksReleaseHead)->tcbSchedContext).rTime, next_interrupt);
552    }
553
554    setDeadline(next_interrupt - getTimerPrecision());
555}
556
557void
558chargeBudget(ticks_t capacity, ticks_t consumed, bool_t canTimeoutFault, word_t core, bool_t isCurCPU)
559{
560
561    if (isRoundRobin(NODE_STATE_ON_CORE(ksCurSC, core))) {
562        assert(refill_size(NODE_STATE_ON_CORE(ksCurSC, core)) == MIN_REFILLS);
563        REFILL_HEAD(NODE_STATE_ON_CORE(ksCurSC, core)).rAmount += REFILL_TAIL(NODE_STATE_ON_CORE(ksCurSC, core)).rAmount;
564        REFILL_TAIL(NODE_STATE_ON_CORE(ksCurSC, core)).rAmount = 0;
565    } else {
566        refill_budget_check(NODE_STATE_ON_CORE(ksCurSC, core), consumed, capacity);
567    }
568
569    assert(REFILL_HEAD(NODE_STATE_ON_CORE(ksCurSC, core)).rAmount >= MIN_BUDGET);
570    NODE_STATE_ON_CORE(ksCurSC, core)->scConsumed += consumed;
571    NODE_STATE_ON_CORE(ksConsumed, core) = 0;
572    if (isCurCPU && likely(isRunnable(NODE_STATE_ON_CORE(ksCurThread, core)))) {
573        endTimeslice(canTimeoutFault);
574        rescheduleRequired();
575        NODE_STATE(ksReprogram) = true;
576    }
577}
578
579void
580endTimeslice(bool_t can_timeout_fault)
581{
582    if (unlikely(NODE_STATE(ksCurThread) == NODE_STATE(ksIdleThread))) {
583        return;
584    }
585
586    assert(isRunnable(NODE_STATE(ksCurSC->scTcb)));
587    if (can_timeout_fault && validTimeoutHandler(NODE_STATE(ksCurThread))) {
588        current_fault = seL4_Fault_Timeout_new(NODE_STATE(ksCurSC)->scBadge);
589        handleTimeout(NODE_STATE(ksCurThread));
590    } else if (refill_ready(NODE_STATE(ksCurSC)) && refill_sufficient(NODE_STATE(ksCurSC), 0)) {
591        /* apply round robin */
592        assert(refill_sufficient(NODE_STATE(ksCurSC), 0));
593        assert(!thread_state_get_tcbQueued(NODE_STATE(ksCurThread)->tcbState));
594        SCHED_APPEND_CURRENT_TCB;
595    } else {
596        /* postpone until ready */
597        postpone(NODE_STATE(ksCurSC));
598    }
599}
600
601void
602rescheduleRequired(void)
603{
604    if (NODE_STATE(ksSchedulerAction) != SchedulerAction_ResumeCurrentThread
605            && NODE_STATE(ksSchedulerAction) != SchedulerAction_ChooseNewThread &&
606            isSchedulable(NODE_STATE(ksSchedulerAction))) {
607        assert(refill_sufficient(NODE_STATE(ksSchedulerAction)->tcbSchedContext, 0));
608        assert(refill_ready(NODE_STATE(ksSchedulerAction)->tcbSchedContext));
609        SCHED_ENQUEUE(NODE_STATE(ksSchedulerAction));
610    }
611    NODE_STATE(ksSchedulerAction) = SchedulerAction_ChooseNewThread;
612}
613
614void
615awaken(void)
616{
617    while (unlikely(NODE_STATE(ksReleaseHead) != NULL && refill_ready(NODE_STATE(ksReleaseHead)->tcbSchedContext))) {
618        tcb_t *awakened = tcbReleaseDequeue();
619        /* the currently running thread cannot have just woken up */
620        assert(awakened != NODE_STATE(ksCurThread));
621        /* round robin threads should not be in the release queue */
622        assert(!isRoundRobin(awakened->tcbSchedContext));
623        /* threads should wake up on the correct core */
624        SMP_COND_STATEMENT(assert(awakened->tcbAffinity == getCurrentCPUIndex()));
625        /* threads HEAD refill should always be > MIN_BUDGET */
626        assert(refill_sufficient(awakened->tcbSchedContext, 0));
627        possibleSwitchTo(awakened);
628        /* changed head of release queue -> need to reprogram */
629        NODE_STATE(ksReprogram) = true;
630    }
631}
632