1/* 2 * taskstats.c - Export per-task statistics to userland 3 * 4 * Copyright (C) Shailabh Nagar, IBM Corp. 2006 5 * (C) Balbir Singh, IBM Corp. 2006 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 */ 18 19#include <linux/kernel.h> 20#include <linux/taskstats_kern.h> 21#include <linux/tsacct_kern.h> 22#include <linux/delayacct.h> 23#include <linux/tsacct_kern.h> 24#include <linux/cpumask.h> 25#include <linux/percpu.h> 26#include <net/genetlink.h> 27#include <asm/atomic.h> 28 29/* 30 * Maximum length of a cpumask that can be specified in 31 * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute 32 */ 33#define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) 34 35static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; 36static int family_registered; 37struct kmem_cache *taskstats_cache; 38 39static struct genl_family family = { 40 .id = GENL_ID_GENERATE, 41 .name = TASKSTATS_GENL_NAME, 42 .version = TASKSTATS_GENL_VERSION, 43 .maxattr = TASKSTATS_CMD_ATTR_MAX, 44}; 45 46static struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] 47__read_mostly = { 48 [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, 49 [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, 50 [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, 51 [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; 52 53struct listener { 54 struct list_head list; 55 pid_t pid; 56 char valid; 57}; 58 59struct listener_list { 60 struct rw_semaphore sem; 61 struct list_head list; 62}; 63static DEFINE_PER_CPU(struct listener_list, listener_array); 64 65enum actions { 66 REGISTER, 67 DEREGISTER, 68 CPU_DONT_CARE 69}; 70 71static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, 72 size_t size) 73{ 74 struct sk_buff *skb; 75 void *reply; 76 77 /* 78 * If new attributes are added, please revisit this allocation 79 */ 80 skb = genlmsg_new(size, GFP_KERNEL); 81 if (!skb) 82 return -ENOMEM; 83 84 if (!info) { 85 int seq = get_cpu_var(taskstats_seqnum)++; 86 put_cpu_var(taskstats_seqnum); 87 88 reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); 89 } else 90 reply = genlmsg_put_reply(skb, info, &family, 0, cmd); 91 if (reply == NULL) { 92 nlmsg_free(skb); 93 return -EINVAL; 94 } 95 96 *skbp = skb; 97 return 0; 98} 99 100/* 101 * Send taskstats data in @skb to listener with nl_pid @pid 102 */ 103static int send_reply(struct sk_buff *skb, pid_t pid) 104{ 105 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 106 void *reply = genlmsg_data(genlhdr); 107 int rc; 108 109 rc = genlmsg_end(skb, reply); 110 if (rc < 0) { 111 nlmsg_free(skb); 112 return rc; 113 } 114 115 return genlmsg_unicast(skb, pid); 116} 117 118/* 119 * Send taskstats data in @skb to listeners registered for @cpu's exit data 120 */ 121static void send_cpu_listeners(struct sk_buff *skb, 122 struct listener_list *listeners) 123{ 124 struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); 125 struct listener *s, *tmp; 126 struct sk_buff *skb_next, *skb_cur = skb; 127 void *reply = genlmsg_data(genlhdr); 128 int rc, delcount = 0; 129 130 rc = genlmsg_end(skb, reply); 131 if (rc < 0) { 132 nlmsg_free(skb); 133 return; 134 } 135 136 rc = 0; 137 down_read(&listeners->sem); 138 list_for_each_entry(s, &listeners->list, list) { 139 skb_next = NULL; 140 if (!list_is_last(&s->list, &listeners->list)) { 141 skb_next = skb_clone(skb_cur, GFP_KERNEL); 142 if (!skb_next) 143 break; 144 } 145 rc = genlmsg_unicast(skb_cur, s->pid); 146 if (rc == -ECONNREFUSED) { 147 s->valid = 0; 148 delcount++; 149 } 150 skb_cur = skb_next; 151 } 152 up_read(&listeners->sem); 153 154 if (skb_cur) 155 nlmsg_free(skb_cur); 156 157 if (!delcount) 158 return; 159 160 /* Delete invalidated entries */ 161 down_write(&listeners->sem); 162 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 163 if (!s->valid) { 164 list_del(&s->list); 165 kfree(s); 166 } 167 } 168 up_write(&listeners->sem); 169} 170 171static int fill_pid(pid_t pid, struct task_struct *tsk, 172 struct taskstats *stats) 173{ 174 int rc = 0; 175 176 if (!tsk) { 177 rcu_read_lock(); 178 tsk = find_task_by_pid(pid); 179 if (tsk) 180 get_task_struct(tsk); 181 rcu_read_unlock(); 182 if (!tsk) 183 return -ESRCH; 184 } else 185 get_task_struct(tsk); 186 187 memset(stats, 0, sizeof(*stats)); 188 /* 189 * Each accounting subsystem adds calls to its functions to 190 * fill in relevant parts of struct taskstsats as follows 191 * 192 * per-task-foo(stats, tsk); 193 */ 194 195 delayacct_add_tsk(stats, tsk); 196 197 /* fill in basic acct fields */ 198 stats->version = TASKSTATS_VERSION; 199 bacct_add_tsk(stats, tsk); 200 201 /* fill in extended acct fields */ 202 xacct_add_tsk(stats, tsk); 203 204 /* Define err: label here if needed */ 205 put_task_struct(tsk); 206 return rc; 207 208} 209 210static int fill_tgid(pid_t tgid, struct task_struct *first, 211 struct taskstats *stats) 212{ 213 struct task_struct *tsk; 214 unsigned long flags; 215 int rc = -ESRCH; 216 217 /* 218 * Add additional stats from live tasks except zombie thread group 219 * leaders who are already counted with the dead tasks 220 */ 221 rcu_read_lock(); 222 if (!first) 223 first = find_task_by_pid(tgid); 224 225 if (!first || !lock_task_sighand(first, &flags)) 226 goto out; 227 228 if (first->signal->stats) 229 memcpy(stats, first->signal->stats, sizeof(*stats)); 230 else 231 memset(stats, 0, sizeof(*stats)); 232 233 tsk = first; 234 do { 235 if (tsk->exit_state) 236 continue; 237 /* 238 * Accounting subsystem can call its functions here to 239 * fill in relevant parts of struct taskstsats as follows 240 * 241 * per-task-foo(stats, tsk); 242 */ 243 delayacct_add_tsk(stats, tsk); 244 245 } while_each_thread(first, tsk); 246 247 unlock_task_sighand(first, &flags); 248 rc = 0; 249out: 250 rcu_read_unlock(); 251 252 stats->version = TASKSTATS_VERSION; 253 /* 254 * Accounting subsytems can also add calls here to modify 255 * fields of taskstats. 256 */ 257 return rc; 258} 259 260 261static void fill_tgid_exit(struct task_struct *tsk) 262{ 263 unsigned long flags; 264 265 spin_lock_irqsave(&tsk->sighand->siglock, flags); 266 if (!tsk->signal->stats) 267 goto ret; 268 269 /* 270 * Each accounting subsystem calls its functions here to 271 * accumalate its per-task stats for tsk, into the per-tgid structure 272 * 273 * per-task-foo(tsk->signal->stats, tsk); 274 */ 275 delayacct_add_tsk(tsk->signal->stats, tsk); 276ret: 277 spin_unlock_irqrestore(&tsk->sighand->siglock, flags); 278 return; 279} 280 281static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) 282{ 283 struct listener_list *listeners; 284 struct listener *s, *tmp; 285 unsigned int cpu; 286 cpumask_t mask = *maskp; 287 288 if (!cpus_subset(mask, cpu_possible_map)) 289 return -EINVAL; 290 291 if (isadd == REGISTER) { 292 for_each_cpu_mask(cpu, mask) { 293 s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, 294 cpu_to_node(cpu)); 295 if (!s) 296 goto cleanup; 297 s->pid = pid; 298 INIT_LIST_HEAD(&s->list); 299 s->valid = 1; 300 301 listeners = &per_cpu(listener_array, cpu); 302 down_write(&listeners->sem); 303 list_add(&s->list, &listeners->list); 304 up_write(&listeners->sem); 305 } 306 return 0; 307 } 308 309 /* Deregister or cleanup */ 310cleanup: 311 for_each_cpu_mask(cpu, mask) { 312 listeners = &per_cpu(listener_array, cpu); 313 down_write(&listeners->sem); 314 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 315 if (s->pid == pid) { 316 list_del(&s->list); 317 kfree(s); 318 break; 319 } 320 } 321 up_write(&listeners->sem); 322 } 323 return 0; 324} 325 326static int parse(struct nlattr *na, cpumask_t *mask) 327{ 328 char *data; 329 int len; 330 int ret; 331 332 if (na == NULL) 333 return 1; 334 len = nla_len(na); 335 if (len > TASKSTATS_CPUMASK_MAXLEN) 336 return -E2BIG; 337 if (len < 1) 338 return -EINVAL; 339 data = kmalloc(len, GFP_KERNEL); 340 if (!data) 341 return -ENOMEM; 342 nla_strlcpy(data, na, len); 343 ret = cpulist_parse(data, *mask); 344 kfree(data); 345 return ret; 346} 347 348static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 349{ 350 struct nlattr *na, *ret; 351 int aggr; 352 353 aggr = (type == TASKSTATS_TYPE_PID) 354 ? TASKSTATS_TYPE_AGGR_PID 355 : TASKSTATS_TYPE_AGGR_TGID; 356 357 na = nla_nest_start(skb, aggr); 358 if (!na) 359 goto err; 360 if (nla_put(skb, type, sizeof(pid), &pid) < 0) 361 goto err; 362 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 363 if (!ret) 364 goto err; 365 nla_nest_end(skb, na); 366 367 return nla_data(ret); 368err: 369 return NULL; 370} 371 372static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 373{ 374 int rc = 0; 375 struct sk_buff *rep_skb; 376 struct taskstats *stats; 377 size_t size; 378 cpumask_t mask; 379 380 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); 381 if (rc < 0) 382 return rc; 383 if (rc == 0) 384 return add_del_listener(info->snd_pid, &mask, REGISTER); 385 386 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); 387 if (rc < 0) 388 return rc; 389 if (rc == 0) 390 return add_del_listener(info->snd_pid, &mask, DEREGISTER); 391 392 /* 393 * Size includes space for nested attributes 394 */ 395 size = nla_total_size(sizeof(u32)) + 396 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 397 398 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 399 if (rc < 0) 400 return rc; 401 402 rc = -EINVAL; 403 if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { 404 u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); 405 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); 406 if (!stats) 407 goto err; 408 409 rc = fill_pid(pid, NULL, stats); 410 if (rc < 0) 411 goto err; 412 } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { 413 u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); 414 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); 415 if (!stats) 416 goto err; 417 418 rc = fill_tgid(tgid, NULL, stats); 419 if (rc < 0) 420 goto err; 421 } else 422 goto err; 423 424 return send_reply(rep_skb, info->snd_pid); 425err: 426 nlmsg_free(rep_skb); 427 return rc; 428} 429 430static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) 431{ 432 struct signal_struct *sig = tsk->signal; 433 struct taskstats *stats; 434 435 if (sig->stats || thread_group_empty(tsk)) 436 goto ret; 437 438 /* No problem if kmem_cache_zalloc() fails */ 439 stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); 440 441 spin_lock_irq(&tsk->sighand->siglock); 442 if (!sig->stats) { 443 sig->stats = stats; 444 stats = NULL; 445 } 446 spin_unlock_irq(&tsk->sighand->siglock); 447 448 if (stats) 449 kmem_cache_free(taskstats_cache, stats); 450ret: 451 return sig->stats; 452} 453 454/* Send pid data out on exit */ 455void taskstats_exit(struct task_struct *tsk, int group_dead) 456{ 457 int rc; 458 struct listener_list *listeners; 459 struct taskstats *stats; 460 struct sk_buff *rep_skb; 461 size_t size; 462 int is_thread_group; 463 464 if (!family_registered) 465 return; 466 467 /* 468 * Size includes space for nested attributes 469 */ 470 size = nla_total_size(sizeof(u32)) + 471 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 472 473 is_thread_group = !!taskstats_tgid_alloc(tsk); 474 if (is_thread_group) { 475 /* PID + STATS + TGID + STATS */ 476 size = 2 * size; 477 /* fill the tsk->signal->stats structure */ 478 fill_tgid_exit(tsk); 479 } 480 481 listeners = &__raw_get_cpu_var(listener_array); 482 if (list_empty(&listeners->list)) 483 return; 484 485 rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); 486 if (rc < 0) 487 return; 488 489 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); 490 if (!stats) 491 goto err; 492 493 rc = fill_pid(tsk->pid, tsk, stats); 494 if (rc < 0) 495 goto err; 496 497 /* 498 * Doesn't matter if tsk is the leader or the last group member leaving 499 */ 500 if (!is_thread_group || !group_dead) 501 goto send; 502 503 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); 504 if (!stats) 505 goto err; 506 507 memcpy(stats, tsk->signal->stats, sizeof(*stats)); 508 509send: 510 send_cpu_listeners(rep_skb, listeners); 511 return; 512err: 513 nlmsg_free(rep_skb); 514} 515 516static struct genl_ops taskstats_ops = { 517 .cmd = TASKSTATS_CMD_GET, 518 .doit = taskstats_user_cmd, 519 .policy = taskstats_cmd_get_policy, 520}; 521 522/* Needed early in initialization */ 523void __init taskstats_init_early(void) 524{ 525 unsigned int i; 526 527 taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); 528 for_each_possible_cpu(i) { 529 INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); 530 init_rwsem(&(per_cpu(listener_array, i).sem)); 531 } 532} 533 534static int __init taskstats_init(void) 535{ 536 int rc; 537 538 rc = genl_register_family(&family); 539 if (rc) 540 return rc; 541 542 rc = genl_register_ops(&family, &taskstats_ops); 543 if (rc < 0) 544 goto err; 545 546 family_registered = 1; 547 return 0; 548err: 549 genl_unregister_family(&family); 550 return rc; 551} 552 553/* 554 * late initcall ensures initialization of statistics collection 555 * mechanisms precedes initialization of the taskstats interface 556 */ 557late_initcall(taskstats_init); 558