1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0
4 *
5 * (C) Copyright 2016 Intel Corporation
6 * Author: Tim Chen <tim.c.chen@linux.intel.com>
7 *
8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
9 * the maximum turbo frequencies of some cores in a CPU package may be
10 * higher than for the other cores in the same package.  In that case,
11 * better performance can be achieved by making the scheduler prefer
12 * to run tasks on the CPUs with higher max turbo frequencies.
13 *
14 * This file provides functions and data structures for enabling the
15 * scheduler to favor scheduling on cores can be boosted to a higher
16 * frequency under ITMT.
17 */
18
19#include <linux/sched.h>
20#include <linux/cpumask.h>
21#include <linux/cpuset.h>
22#include <linux/mutex.h>
23#include <linux/sysctl.h>
24#include <linux/nodemask.h>
25
26static DEFINE_MUTEX(itmt_update_mutex);
27DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
28
29/* Boolean to track if system has ITMT capabilities */
30static bool __read_mostly sched_itmt_capable;
31
32/*
33 * Boolean to control whether we want to move processes to cpu capable
34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
35 * Technology 3.0.
36 *
37 * It can be set via /proc/sys/kernel/sched_itmt_enabled
38 */
39unsigned int __read_mostly sysctl_sched_itmt_enabled;
40
41static int sched_itmt_update_handler(struct ctl_table *table, int write,
42				     void *buffer, size_t *lenp, loff_t *ppos)
43{
44	unsigned int old_sysctl;
45	int ret;
46
47	mutex_lock(&itmt_update_mutex);
48
49	if (!sched_itmt_capable) {
50		mutex_unlock(&itmt_update_mutex);
51		return -EINVAL;
52	}
53
54	old_sysctl = sysctl_sched_itmt_enabled;
55	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
56
57	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
58		x86_topology_update = true;
59		rebuild_sched_domains();
60	}
61
62	mutex_unlock(&itmt_update_mutex);
63
64	return ret;
65}
66
67static struct ctl_table itmt_kern_table[] = {
68	{
69		.procname	= "sched_itmt_enabled",
70		.data		= &sysctl_sched_itmt_enabled,
71		.maxlen		= sizeof(unsigned int),
72		.mode		= 0644,
73		.proc_handler	= sched_itmt_update_handler,
74		.extra1		= SYSCTL_ZERO,
75		.extra2		= SYSCTL_ONE,
76	},
77};
78
79static struct ctl_table_header *itmt_sysctl_header;
80
81/**
82 * sched_set_itmt_support() - Indicate platform supports ITMT
83 *
84 * This function is used by the OS to indicate to scheduler that the platform
85 * is capable of supporting the ITMT feature.
86 *
87 * The current scheme has the pstate driver detects if the system
88 * is ITMT capable and call sched_set_itmt_support.
89 *
90 * This must be done only after sched_set_itmt_core_prio
91 * has been called to set the cpus' priorities.
92 * It must not be called with cpu hot plug lock
93 * held as we need to acquire the lock to rebuild sched domains
94 * later.
95 *
96 * Return: 0 on success
97 */
98int sched_set_itmt_support(void)
99{
100	mutex_lock(&itmt_update_mutex);
101
102	if (sched_itmt_capable) {
103		mutex_unlock(&itmt_update_mutex);
104		return 0;
105	}
106
107	itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
108	if (!itmt_sysctl_header) {
109		mutex_unlock(&itmt_update_mutex);
110		return -ENOMEM;
111	}
112
113	sched_itmt_capable = true;
114
115	sysctl_sched_itmt_enabled = 1;
116
117	x86_topology_update = true;
118	rebuild_sched_domains();
119
120	mutex_unlock(&itmt_update_mutex);
121
122	return 0;
123}
124
125/**
126 * sched_clear_itmt_support() - Revoke platform's support of ITMT
127 *
128 * This function is used by the OS to indicate that it has
129 * revoked the platform's support of ITMT feature.
130 *
131 * It must not be called with cpu hot plug lock
132 * held as we need to acquire the lock to rebuild sched domains
133 * later.
134 */
135void sched_clear_itmt_support(void)
136{
137	mutex_lock(&itmt_update_mutex);
138
139	if (!sched_itmt_capable) {
140		mutex_unlock(&itmt_update_mutex);
141		return;
142	}
143	sched_itmt_capable = false;
144
145	if (itmt_sysctl_header) {
146		unregister_sysctl_table(itmt_sysctl_header);
147		itmt_sysctl_header = NULL;
148	}
149
150	if (sysctl_sched_itmt_enabled) {
151		/* disable sched_itmt if we are no longer ITMT capable */
152		sysctl_sched_itmt_enabled = 0;
153		x86_topology_update = true;
154		rebuild_sched_domains();
155	}
156
157	mutex_unlock(&itmt_update_mutex);
158}
159
160int arch_asym_cpu_priority(int cpu)
161{
162	return per_cpu(sched_core_priority, cpu);
163}
164
165/**
166 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
167 * @prio:	Priority of @cpu
168 * @cpu:	The CPU number
169 *
170 * The pstate driver will find out the max boost frequency
171 * and call this function to set a priority proportional
172 * to the max boost frequency. CPUs with higher boost
173 * frequency will receive higher priority.
174 *
175 * No need to rebuild sched domain after updating
176 * the CPU priorities. The sched domains have no
177 * dependency on CPU priorities.
178 */
179void sched_set_itmt_core_prio(int prio, int cpu)
180{
181	per_cpu(sched_core_priority, cpu) = prio;
182}
183