1===================
2Block io priorities
3===================
4
5
6Intro
7-----
8
9The io priority feature enables users to io nice processes or process groups,
10similar to what has been possible with cpu scheduling for ages. Support for io
11priorities is io scheduler dependent and currently supported by bfq and
12mq-deadline.
13
14Scheduling classes
15------------------
16
17Three generic scheduling classes are implemented for io priorities that
18determine how io is served for a process.
19
20IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
21higher priority than any other in the system, processes from this class are
22given first access to the disk every time. Thus it needs to be used with some
23care, one io RT process can starve the entire system. Within the RT class,
24there are 8 levels of class data that determine exactly how much time this
25process needs the disk for on each service. In the future this might change
26to be more directly mappable to performance, by passing in a wanted data
27rate instead.
28
29IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
30for any process that hasn't set a specific io priority. The class data
31determines how much io bandwidth the process will get, it's directly mappable
32to the cpu nice levels just more coarsely implemented. 0 is the highest
33BE prio level, 7 is the lowest. The mapping between cpu nice level and io
34nice level is determined as: io_nice = (cpu_nice + 20) / 5.
35
36IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
37level only get io time when no one else needs the disk. The idle class has no
38class data, since it doesn't really apply here.
39
40Tools
41-----
42
43See below for a sample ionice tool. Usage::
44
45	# ionice -c<class> -n<level> -p<pid>
46
47If pid isn't given, the current process is assumed. IO priority settings
48are inherited on fork, so you can use ionice to start the process at a given
49level::
50
51	# ionice -c2 -n0 /bin/ls
52
53will run ls at the best-effort scheduling class at the highest priority.
54For a running process, you can give the pid instead::
55
56	# ionice -c1 -n2 -p100
57
58will change pid 100 to run at the realtime scheduling class, at priority 2.
59
60ionice.c tool::
61
62  #include <stdio.h>
63  #include <stdlib.h>
64  #include <errno.h>
65  #include <getopt.h>
66  #include <unistd.h>
67  #include <sys/ptrace.h>
68  #include <asm/unistd.h>
69
70  extern int sys_ioprio_set(int, int, int);
71  extern int sys_ioprio_get(int, int);
72
73  #if defined(__i386__)
74  #define __NR_ioprio_set		289
75  #define __NR_ioprio_get		290
76  #elif defined(__ppc__)
77  #define __NR_ioprio_set		273
78  #define __NR_ioprio_get		274
79  #elif defined(__x86_64__)
80  #define __NR_ioprio_set		251
81  #define __NR_ioprio_get		252
82  #else
83  #error "Unsupported arch"
84  #endif
85
86  static inline int ioprio_set(int which, int who, int ioprio)
87  {
88	return syscall(__NR_ioprio_set, which, who, ioprio);
89  }
90
91  static inline int ioprio_get(int which, int who)
92  {
93	return syscall(__NR_ioprio_get, which, who);
94  }
95
96  enum {
97	IOPRIO_CLASS_NONE,
98	IOPRIO_CLASS_RT,
99	IOPRIO_CLASS_BE,
100	IOPRIO_CLASS_IDLE,
101  };
102
103  enum {
104	IOPRIO_WHO_PROCESS = 1,
105	IOPRIO_WHO_PGRP,
106	IOPRIO_WHO_USER,
107  };
108
109  #define IOPRIO_CLASS_SHIFT	13
110
111  const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
112
113  int main(int argc, char *argv[])
114  {
115	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
116	int c, pid = 0;
117
118	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
119		switch (c) {
120		case 'n':
121			ioprio = strtol(optarg, NULL, 10);
122			set = 1;
123			break;
124		case 'c':
125			ioprio_class = strtol(optarg, NULL, 10);
126			set = 1;
127			break;
128		case 'p':
129			pid = strtol(optarg, NULL, 10);
130			break;
131		}
132	}
133
134	switch (ioprio_class) {
135		case IOPRIO_CLASS_NONE:
136			ioprio_class = IOPRIO_CLASS_BE;
137			break;
138		case IOPRIO_CLASS_RT:
139		case IOPRIO_CLASS_BE:
140			break;
141		case IOPRIO_CLASS_IDLE:
142			ioprio = 7;
143			break;
144		default:
145			printf("bad prio class %d\n", ioprio_class);
146			return 1;
147	}
148
149	if (!set) {
150		if (!pid && argv[optind])
151			pid = strtol(argv[optind], NULL, 10);
152
153		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
154
155		printf("pid=%d, %d\n", pid, ioprio);
156
157		if (ioprio == -1)
158			perror("ioprio_get");
159		else {
160			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
161			ioprio = ioprio & 0xff;
162			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
163		}
164	} else {
165		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
166			perror("ioprio_set");
167			return 1;
168		}
169
170		if (argv[optind])
171			execvp(argv[optind], &argv[optind]);
172	}
173
174	return 0;
175  }
176
177
178March 11 2005, Jens Axboe <jens.axboe@oracle.com>
179