1/* getdelays.c
2 *
3 * Utility to get per-pid and per-tgid delay accounting statistics
4 * Also illustrates usage of the taskstats interface
5 *
6 * Copyright (C) Shailabh Nagar, IBM Corp. 2005
7 * Copyright (C) Balbir Singh, IBM Corp. 2006
8 * Copyright (c) Jay Lan, SGI. 2006
9 *
10 * Compile with
11 *	gcc -I/usr/src/linux/include getdelays.c -o getdelays
12 */
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <errno.h>
17#include <unistd.h>
18#include <poll.h>
19#include <string.h>
20#include <fcntl.h>
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <sys/socket.h>
24#include <sys/types.h>
25#include <signal.h>
26
27#include <linux/genetlink.h>
28#include <linux/taskstats.h>
29
30/*
31 * Generic macros for dealing with netlink sockets. Might be duplicated
32 * elsewhere. It is recommended that commercial grade applications use
33 * libnl or libnetlink and use the interfaces provided by the library
34 */
35#define GENLMSG_DATA(glh)	((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
36#define GENLMSG_PAYLOAD(glh)	(NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
37#define NLA_DATA(na)		((void *)((char*)(na) + NLA_HDRLEN))
38#define NLA_PAYLOAD(len)	(len - NLA_HDRLEN)
39
40#define err(code, fmt, arg...)			\
41	do {					\
42		fprintf(stderr, fmt, ##arg);	\
43		exit(code);			\
44	} while (0)
45
46int done;
47int rcvbufsz;
48char name[100];
49int dbg;
50int print_delays;
51int print_io_accounting;
52__u64 stime, utime;
53
54#define PRINTF(fmt, arg...) {			\
55	    if (dbg) {				\
56		printf(fmt, ##arg);		\
57	    }					\
58	}
59
60/* Maximum size of response requested or message sent */
61#define MAX_MSG_SIZE	1024
62/* Maximum number of cpus expected to be specified in a cpumask */
63#define MAX_CPUS	32
64
65struct msgtemplate {
66	struct nlmsghdr n;
67	struct genlmsghdr g;
68	char buf[MAX_MSG_SIZE];
69};
70
71char cpumask[100+6*MAX_CPUS];
72
73static void usage(void)
74{
75	fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
76			"[-m cpumask] [-t tgid] [-p pid]\n");
77	fprintf(stderr, "  -d: print delayacct stats\n");
78	fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
79	fprintf(stderr, "  -l: listen forever\n");
80	fprintf(stderr, "  -v: debug on\n");
81}
82
83/*
84 * Create a raw netlink socket and bind
85 */
86static int create_nl_socket(int protocol)
87{
88	int fd;
89	struct sockaddr_nl local;
90
91	fd = socket(AF_NETLINK, SOCK_RAW, protocol);
92	if (fd < 0)
93		return -1;
94
95	if (rcvbufsz)
96		if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
97				&rcvbufsz, sizeof(rcvbufsz)) < 0) {
98			fprintf(stderr, "Unable to set socket rcv buf size "
99					"to %d\n",
100				rcvbufsz);
101			return -1;
102		}
103
104	memset(&local, 0, sizeof(local));
105	local.nl_family = AF_NETLINK;
106
107	if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
108		goto error;
109
110	return fd;
111error:
112	close(fd);
113	return -1;
114}
115
116
117int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
118	     __u8 genl_cmd, __u16 nla_type,
119	     void *nla_data, int nla_len)
120{
121	struct nlattr *na;
122	struct sockaddr_nl nladdr;
123	int r, buflen;
124	char *buf;
125
126	struct msgtemplate msg;
127
128	msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
129	msg.n.nlmsg_type = nlmsg_type;
130	msg.n.nlmsg_flags = NLM_F_REQUEST;
131	msg.n.nlmsg_seq = 0;
132	msg.n.nlmsg_pid = nlmsg_pid;
133	msg.g.cmd = genl_cmd;
134	msg.g.version = 0x1;
135	na = (struct nlattr *) GENLMSG_DATA(&msg);
136	na->nla_type = nla_type;
137	na->nla_len = nla_len + 1 + NLA_HDRLEN;
138	memcpy(NLA_DATA(na), nla_data, nla_len);
139	msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
140
141	buf = (char *) &msg;
142	buflen = msg.n.nlmsg_len ;
143	memset(&nladdr, 0, sizeof(nladdr));
144	nladdr.nl_family = AF_NETLINK;
145	while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
146			   sizeof(nladdr))) < buflen) {
147		if (r > 0) {
148			buf += r;
149			buflen -= r;
150		} else if (errno != EAGAIN)
151			return -1;
152	}
153	return 0;
154}
155
156
157/*
158 * Probe the controller in genetlink to find the family id
159 * for the TASKSTATS family
160 */
161int get_family_id(int sd)
162{
163	struct {
164		struct nlmsghdr n;
165		struct genlmsghdr g;
166		char buf[256];
167	} ans;
168
169	int id, rc;
170	struct nlattr *na;
171	int rep_len;
172
173	strcpy(name, TASKSTATS_GENL_NAME);
174	rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
175			CTRL_ATTR_FAMILY_NAME, (void *)name,
176			strlen(TASKSTATS_GENL_NAME)+1);
177
178	rep_len = recv(sd, &ans, sizeof(ans), 0);
179	if (ans.n.nlmsg_type == NLMSG_ERROR ||
180	    (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
181		return 0;
182
183	na = (struct nlattr *) GENLMSG_DATA(&ans);
184	na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
185	if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
186		id = *(__u16 *) NLA_DATA(na);
187	}
188	return id;
189}
190
191void print_delayacct(struct taskstats *t)
192{
193	printf("\n\nCPU   %15s%15s%15s%15s\n"
194	       "      %15llu%15llu%15llu%15llu\n"
195	       "IO    %15s%15s\n"
196	       "      %15llu%15llu\n"
197	       "MEM   %15s%15s\n"
198	       "      %15llu%15llu\n\n",
199	       "count", "real total", "virtual total", "delay total",
200	       t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
201	       t->cpu_delay_total,
202	       "count", "delay total",
203	       t->blkio_count, t->blkio_delay_total,
204	       "count", "delay total", t->swapin_count, t->swapin_delay_total);
205}
206
207void print_ioacct(struct taskstats *t)
208{
209	printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
210		t->ac_comm,
211		(unsigned long long)t->read_bytes,
212		(unsigned long long)t->write_bytes,
213		(unsigned long long)t->cancelled_write_bytes);
214}
215
216int main(int argc, char *argv[])
217{
218	int c, rc, rep_len, aggr_len, len2, cmd_type;
219	__u16 id;
220	__u32 mypid;
221
222	struct nlattr *na;
223	int nl_sd = -1;
224	int len = 0;
225	pid_t tid = 0;
226	pid_t rtid = 0;
227
228	int fd = 0;
229	int count = 0;
230	int write_file = 0;
231	int maskset = 0;
232	char *logfile = NULL;
233	int loop = 0;
234
235	struct msgtemplate msg;
236
237	while (1) {
238		c = getopt(argc, argv, "diw:r:m:t:p:vl");
239		if (c < 0)
240			break;
241
242		switch (c) {
243		case 'd':
244			printf("print delayacct stats ON\n");
245			print_delays = 1;
246			break;
247		case 'i':
248			printf("printing IO accounting\n");
249			print_io_accounting = 1;
250			break;
251		case 'w':
252			logfile = strdup(optarg);
253			printf("write to file %s\n", logfile);
254			write_file = 1;
255			break;
256		case 'r':
257			rcvbufsz = atoi(optarg);
258			printf("receive buf size %d\n", rcvbufsz);
259			if (rcvbufsz < 0)
260				err(1, "Invalid rcv buf size\n");
261			break;
262		case 'm':
263			strncpy(cpumask, optarg, sizeof(cpumask));
264			maskset = 1;
265			printf("cpumask %s maskset %d\n", cpumask, maskset);
266			break;
267		case 't':
268			tid = atoi(optarg);
269			if (!tid)
270				err(1, "Invalid tgid\n");
271			cmd_type = TASKSTATS_CMD_ATTR_TGID;
272			break;
273		case 'p':
274			tid = atoi(optarg);
275			if (!tid)
276				err(1, "Invalid pid\n");
277			cmd_type = TASKSTATS_CMD_ATTR_PID;
278			break;
279		case 'v':
280			printf("debug on\n");
281			dbg = 1;
282			break;
283		case 'l':
284			printf("listen forever\n");
285			loop = 1;
286			break;
287		default:
288			usage();
289			exit(-1);
290		}
291	}
292
293	if (write_file) {
294		fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
295			  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
296		if (fd == -1) {
297			perror("Cannot open output file\n");
298			exit(1);
299		}
300	}
301
302	if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0)
303		err(1, "error creating Netlink socket\n");
304
305
306	mypid = getpid();
307	id = get_family_id(nl_sd);
308	if (!id) {
309		fprintf(stderr, "Error getting family id, errno %d\n", errno);
310		goto err;
311	}
312	PRINTF("family id %d\n", id);
313
314	if (maskset) {
315		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
316			      TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
317			      &cpumask, strlen(cpumask) + 1);
318		PRINTF("Sent register cpumask, retval %d\n", rc);
319		if (rc < 0) {
320			fprintf(stderr, "error sending register cpumask\n");
321			goto err;
322		}
323	}
324
325	if (tid) {
326		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
327			      cmd_type, &tid, sizeof(__u32));
328		PRINTF("Sent pid/tgid, retval %d\n", rc);
329		if (rc < 0) {
330			fprintf(stderr, "error sending tid/tgid cmd\n");
331			goto done;
332		}
333	}
334
335	do {
336		int i;
337
338		rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
339		PRINTF("received %d bytes\n", rep_len);
340
341		if (rep_len < 0) {
342			fprintf(stderr, "nonfatal reply error: errno %d\n",
343				errno);
344			continue;
345		}
346		if (msg.n.nlmsg_type == NLMSG_ERROR ||
347		    !NLMSG_OK((&msg.n), rep_len)) {
348			struct nlmsgerr *err = NLMSG_DATA(&msg);
349			fprintf(stderr, "fatal reply error,  errno %d\n",
350				err->error);
351			goto done;
352		}
353
354		PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n",
355		       sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
356
357
358		rep_len = GENLMSG_PAYLOAD(&msg.n);
359
360		na = (struct nlattr *) GENLMSG_DATA(&msg);
361		len = 0;
362		i = 0;
363		while (len < rep_len) {
364			len += NLA_ALIGN(na->nla_len);
365			switch (na->nla_type) {
366			case TASKSTATS_TYPE_AGGR_TGID:
367				/* Fall through */
368			case TASKSTATS_TYPE_AGGR_PID:
369				aggr_len = NLA_PAYLOAD(na->nla_len);
370				len2 = 0;
371				/* For nested attributes, na follows */
372				na = (struct nlattr *) NLA_DATA(na);
373				done = 0;
374				while (len2 < aggr_len) {
375					switch (na->nla_type) {
376					case TASKSTATS_TYPE_PID:
377						rtid = *(int *) NLA_DATA(na);
378						if (print_delays)
379							printf("PID\t%d\n", rtid);
380						break;
381					case TASKSTATS_TYPE_TGID:
382						rtid = *(int *) NLA_DATA(na);
383						if (print_delays)
384							printf("TGID\t%d\n", rtid);
385						break;
386					case TASKSTATS_TYPE_STATS:
387						count++;
388						if (print_delays)
389							print_delayacct((struct taskstats *) NLA_DATA(na));
390						if (print_io_accounting)
391							print_ioacct((struct taskstats *) NLA_DATA(na));
392						if (fd) {
393							if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
394								err(1,"write error\n");
395							}
396						}
397						if (!loop)
398							goto done;
399						break;
400					default:
401						fprintf(stderr, "Unknown nested"
402							" nla_type %d\n",
403							na->nla_type);
404						break;
405					}
406					len2 += NLA_ALIGN(na->nla_len);
407					na = (struct nlattr *) ((char *) na + len2);
408				}
409				break;
410
411			default:
412				fprintf(stderr, "Unknown nla_type %d\n",
413					na->nla_type);
414				break;
415			}
416			na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
417		}
418	} while (loop);
419done:
420	if (maskset) {
421		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
422			      TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
423			      &cpumask, strlen(cpumask) + 1);
424		printf("Sent deregister mask, retval %d\n", rc);
425		if (rc < 0)
426			err(rc, "error sending deregister cpumask\n");
427	}
428err:
429	close(nl_sd);
430	if (fd)
431		close(fd);
432	return 0;
433}
434