1// SPDX-License-Identifier: GPL-2.0-only
2
3/*
4 * A simple wrapper around refcount. An allocated sched_core_cookie's
5 * address is used to compute the cookie of the task.
6 */
7struct sched_core_cookie {
8	refcount_t refcnt;
9};
10
11static unsigned long sched_core_alloc_cookie(void)
12{
13	struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL);
14	if (!ck)
15		return 0;
16
17	refcount_set(&ck->refcnt, 1);
18	sched_core_get();
19
20	return (unsigned long)ck;
21}
22
23static void sched_core_put_cookie(unsigned long cookie)
24{
25	struct sched_core_cookie *ptr = (void *)cookie;
26
27	if (ptr && refcount_dec_and_test(&ptr->refcnt)) {
28		kfree(ptr);
29		sched_core_put();
30	}
31}
32
33static unsigned long sched_core_get_cookie(unsigned long cookie)
34{
35	struct sched_core_cookie *ptr = (void *)cookie;
36
37	if (ptr)
38		refcount_inc(&ptr->refcnt);
39
40	return cookie;
41}
42
43/*
44 * sched_core_update_cookie - replace the cookie on a task
45 * @p: the task to update
46 * @cookie: the new cookie
47 *
48 * Effectively exchange the task cookie; caller is responsible for lifetimes on
49 * both ends.
50 *
51 * Returns: the old cookie
52 */
53static unsigned long sched_core_update_cookie(struct task_struct *p,
54					      unsigned long cookie)
55{
56	unsigned long old_cookie;
57	struct rq_flags rf;
58	struct rq *rq;
59
60	rq = task_rq_lock(p, &rf);
61
62	/*
63	 * Since creating a cookie implies sched_core_get(), and we cannot set
64	 * a cookie until after we've created it, similarly, we cannot destroy
65	 * a cookie until after we've removed it, we must have core scheduling
66	 * enabled here.
67	 */
68	SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq));
69
70	if (sched_core_enqueued(p))
71		sched_core_dequeue(rq, p, DEQUEUE_SAVE);
72
73	old_cookie = p->core_cookie;
74	p->core_cookie = cookie;
75
76	/*
77	 * Consider the cases: !prev_cookie and !cookie.
78	 */
79	if (cookie && task_on_rq_queued(p))
80		sched_core_enqueue(rq, p);
81
82	/*
83	 * If task is currently running, it may not be compatible anymore after
84	 * the cookie change, so enter the scheduler on its CPU to schedule it
85	 * away.
86	 *
87	 * Note that it is possible that as a result of this cookie change, the
88	 * core has now entered/left forced idle state. Defer accounting to the
89	 * next scheduling edge, rather than always forcing a reschedule here.
90	 */
91	if (task_on_cpu(rq, p))
92		resched_curr(rq);
93
94	task_rq_unlock(rq, p, &rf);
95
96	return old_cookie;
97}
98
99static unsigned long sched_core_clone_cookie(struct task_struct *p)
100{
101	unsigned long cookie, flags;
102
103	raw_spin_lock_irqsave(&p->pi_lock, flags);
104	cookie = sched_core_get_cookie(p->core_cookie);
105	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
106
107	return cookie;
108}
109
110void sched_core_fork(struct task_struct *p)
111{
112	RB_CLEAR_NODE(&p->core_node);
113	p->core_cookie = sched_core_clone_cookie(current);
114}
115
116void sched_core_free(struct task_struct *p)
117{
118	sched_core_put_cookie(p->core_cookie);
119}
120
121static void __sched_core_set(struct task_struct *p, unsigned long cookie)
122{
123	cookie = sched_core_get_cookie(cookie);
124	cookie = sched_core_update_cookie(p, cookie);
125	sched_core_put_cookie(cookie);
126}
127
128/* Called from prctl interface: PR_SCHED_CORE */
129int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
130			 unsigned long uaddr)
131{
132	unsigned long cookie = 0, id = 0;
133	struct task_struct *task, *p;
134	struct pid *grp;
135	int err = 0;
136
137	if (!static_branch_likely(&sched_smt_present))
138		return -ENODEV;
139
140	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID);
141	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID);
142	BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID);
143
144	if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 ||
145	    (cmd != PR_SCHED_CORE_GET && uaddr))
146		return -EINVAL;
147
148	rcu_read_lock();
149	if (pid == 0) {
150		task = current;
151	} else {
152		task = find_task_by_vpid(pid);
153		if (!task) {
154			rcu_read_unlock();
155			return -ESRCH;
156		}
157	}
158	get_task_struct(task);
159	rcu_read_unlock();
160
161	/*
162	 * Check if this process has the right to modify the specified
163	 * process. Use the regular "ptrace_may_access()" checks.
164	 */
165	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
166		err = -EPERM;
167		goto out;
168	}
169
170	switch (cmd) {
171	case PR_SCHED_CORE_GET:
172		if (type != PIDTYPE_PID || uaddr & 7) {
173			err = -EINVAL;
174			goto out;
175		}
176		cookie = sched_core_clone_cookie(task);
177		if (cookie) {
178			/* XXX improve ? */
179			ptr_to_hashval((void *)cookie, &id);
180		}
181		err = put_user(id, (u64 __user *)uaddr);
182		goto out;
183
184	case PR_SCHED_CORE_CREATE:
185		cookie = sched_core_alloc_cookie();
186		if (!cookie) {
187			err = -ENOMEM;
188			goto out;
189		}
190		break;
191
192	case PR_SCHED_CORE_SHARE_TO:
193		cookie = sched_core_clone_cookie(current);
194		break;
195
196	case PR_SCHED_CORE_SHARE_FROM:
197		if (type != PIDTYPE_PID) {
198			err = -EINVAL;
199			goto out;
200		}
201		cookie = sched_core_clone_cookie(task);
202		__sched_core_set(current, cookie);
203		goto out;
204
205	default:
206		err = -EINVAL;
207		goto out;
208	}
209
210	if (type == PIDTYPE_PID) {
211		__sched_core_set(task, cookie);
212		goto out;
213	}
214
215	read_lock(&tasklist_lock);
216	grp = task_pid_type(task, type);
217
218	do_each_pid_thread(grp, type, p) {
219		if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) {
220			err = -EPERM;
221			goto out_tasklist;
222		}
223	} while_each_pid_thread(grp, type, p);
224
225	do_each_pid_thread(grp, type, p) {
226		__sched_core_set(p, cookie);
227	} while_each_pid_thread(grp, type, p);
228out_tasklist:
229	read_unlock(&tasklist_lock);
230
231out:
232	sched_core_put_cookie(cookie);
233	put_task_struct(task);
234	return err;
235}
236
237#ifdef CONFIG_SCHEDSTATS
238
239/* REQUIRES: rq->core's clock recently updated. */
240void __sched_core_account_forceidle(struct rq *rq)
241{
242	const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
243	u64 delta, now = rq_clock(rq->core);
244	struct rq *rq_i;
245	struct task_struct *p;
246	int i;
247
248	lockdep_assert_rq_held(rq);
249
250	WARN_ON_ONCE(!rq->core->core_forceidle_count);
251
252	if (rq->core->core_forceidle_start == 0)
253		return;
254
255	delta = now - rq->core->core_forceidle_start;
256	if (unlikely((s64)delta <= 0))
257		return;
258
259	rq->core->core_forceidle_start = now;
260
261	if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) {
262		/* can't be forced idle without a running task */
263	} else if (rq->core->core_forceidle_count > 1 ||
264		   rq->core->core_forceidle_occupation > 1) {
265		/*
266		 * For larger SMT configurations, we need to scale the charged
267		 * forced idle amount since there can be more than one forced
268		 * idle sibling and more than one running cookied task.
269		 */
270		delta *= rq->core->core_forceidle_count;
271		delta = div_u64(delta, rq->core->core_forceidle_occupation);
272	}
273
274	for_each_cpu(i, smt_mask) {
275		rq_i = cpu_rq(i);
276		p = rq_i->core_pick ?: rq_i->curr;
277
278		if (p == rq_i->idle)
279			continue;
280
281		/*
282		 * Note: this will account forceidle to the current cpu, even
283		 * if it comes from our SMT sibling.
284		 */
285		__account_forceidle_time(p, delta);
286	}
287}
288
289void __sched_core_tick(struct rq *rq)
290{
291	if (!rq->core->core_forceidle_count)
292		return;
293
294	if (rq != rq->core)
295		update_rq_clock(rq->core);
296
297	__sched_core_account_forceidle(rq);
298}
299
300#endif /* CONFIG_SCHEDSTATS */
301