1/* 2 * net/sched/police.c Input police filter. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * J Hadi Salim (action changes) 11 */ 12 13#include <asm/uaccess.h> 14#include <asm/system.h> 15#include <linux/bitops.h> 16#include <linux/module.h> 17#include <linux/types.h> 18#include <linux/kernel.h> 19#include <linux/string.h> 20#include <linux/mm.h> 21#include <linux/socket.h> 22#include <linux/sockios.h> 23#include <linux/in.h> 24#include <linux/errno.h> 25#include <linux/interrupt.h> 26#include <linux/netdevice.h> 27#include <linux/skbuff.h> 28#include <linux/module.h> 29#include <linux/rtnetlink.h> 30#include <linux/init.h> 31#include <net/sock.h> 32#include <net/act_api.h> 33#include <net/netlink.h> 34 35#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) 36#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) 37 38#define POL_TAB_MASK 15 39static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; 40static u32 police_idx_gen; 41static DEFINE_RWLOCK(police_lock); 42 43static struct tcf_hashinfo police_hash_info = { 44 .htab = tcf_police_ht, 45 .hmask = POL_TAB_MASK, 46 .lock = &police_lock, 47}; 48 49/* old policer structure from before tc actions */ 50struct tc_police_compat 51{ 52 u32 index; 53 int action; 54 u32 limit; 55 u32 burst; 56 u32 mtu; 57 struct tc_ratespec rate; 58 struct tc_ratespec peakrate; 59}; 60 61/* Each policer is serialized by its individual spinlock */ 62 63#ifdef CONFIG_NET_CLS_ACT 64static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 65 int type, struct tc_action *a) 66{ 67 struct tcf_common *p; 68 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 69 struct rtattr *r; 70 71 read_lock(&police_lock); 72 73 s_i = cb->args[0]; 74 75 for (i = 0; i < (POL_TAB_MASK + 1); i++) { 76 p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; 77 78 for (; p; p = p->tcfc_next) { 79 index++; 80 if (index < s_i) 81 continue; 82 a->priv = p; 83 a->order = index; 84 r = (struct rtattr *)skb_tail_pointer(skb); 85 RTA_PUT(skb, a->order, 0, NULL); 86 if (type == RTM_DELACTION) 87 err = tcf_action_dump_1(skb, a, 0, 1); 88 else 89 err = tcf_action_dump_1(skb, a, 0, 0); 90 if (err < 0) { 91 index--; 92 nlmsg_trim(skb, r); 93 goto done; 94 } 95 r->rta_len = skb_tail_pointer(skb) - (u8 *)r; 96 n_i++; 97 } 98 } 99done: 100 read_unlock(&police_lock); 101 if (n_i) 102 cb->args[0] += n_i; 103 return n_i; 104 105rtattr_failure: 106 nlmsg_trim(skb, r); 107 goto done; 108} 109#endif 110 111void tcf_police_destroy(struct tcf_police *p) 112{ 113 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 114 struct tcf_common **p1p; 115 116 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { 117 if (*p1p == &p->common) { 118 write_lock_bh(&police_lock); 119 *p1p = p->tcf_next; 120 write_unlock_bh(&police_lock); 121#ifdef CONFIG_NET_ESTIMATOR 122 gen_kill_estimator(&p->tcf_bstats, 123 &p->tcf_rate_est); 124#endif 125 if (p->tcfp_R_tab) 126 qdisc_put_rtab(p->tcfp_R_tab); 127 if (p->tcfp_P_tab) 128 qdisc_put_rtab(p->tcfp_P_tab); 129 kfree(p); 130 return; 131 } 132 } 133 BUG_TRAP(0); 134} 135 136#ifdef CONFIG_NET_CLS_ACT 137static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, 138 struct tc_action *a, int ovr, int bind) 139{ 140 unsigned h; 141 int ret = 0, err; 142 struct rtattr *tb[TCA_POLICE_MAX]; 143 struct tc_police *parm; 144 struct tcf_police *police; 145 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 146 int size; 147 148 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 149 return -EINVAL; 150 151 if (tb[TCA_POLICE_TBF-1] == NULL) 152 return -EINVAL; 153 size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); 154 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) 155 return -EINVAL; 156 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 157 158 if (tb[TCA_POLICE_RESULT-1] != NULL && 159 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 160 return -EINVAL; 161 if (tb[TCA_POLICE_RESULT-1] != NULL && 162 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 163 return -EINVAL; 164 165 if (parm->index) { 166 struct tcf_common *pc; 167 168 pc = tcf_hash_lookup(parm->index, &police_hash_info); 169 if (pc != NULL) { 170 a->priv = pc; 171 police = to_police(pc); 172 if (bind) { 173 police->tcf_bindcnt += 1; 174 police->tcf_refcnt += 1; 175 } 176 if (ovr) 177 goto override; 178 return ret; 179 } 180 } 181 182 police = kzalloc(sizeof(*police), GFP_KERNEL); 183 if (police == NULL) 184 return -ENOMEM; 185 ret = ACT_P_CREATED; 186 police->tcf_refcnt = 1; 187 spin_lock_init(&police->tcf_lock); 188 police->tcf_stats_lock = &police->tcf_lock; 189 if (bind) 190 police->tcf_bindcnt = 1; 191override: 192 if (parm->rate.rate) { 193 err = -ENOMEM; 194 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 195 if (R_tab == NULL) 196 goto failure; 197 if (parm->peakrate.rate) { 198 P_tab = qdisc_get_rtab(&parm->peakrate, 199 tb[TCA_POLICE_PEAKRATE-1]); 200 if (P_tab == NULL) { 201 qdisc_put_rtab(R_tab); 202 goto failure; 203 } 204 } 205 } 206 /* No failure allowed after this point */ 207 spin_lock_bh(&police->tcf_lock); 208 if (R_tab != NULL) { 209 qdisc_put_rtab(police->tcfp_R_tab); 210 police->tcfp_R_tab = R_tab; 211 } 212 if (P_tab != NULL) { 213 qdisc_put_rtab(police->tcfp_P_tab); 214 police->tcfp_P_tab = P_tab; 215 } 216 217 if (tb[TCA_POLICE_RESULT-1]) 218 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 219 police->tcfp_toks = police->tcfp_burst = parm->burst; 220 police->tcfp_mtu = parm->mtu; 221 if (police->tcfp_mtu == 0) { 222 police->tcfp_mtu = ~0; 223 if (police->tcfp_R_tab) 224 police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; 225 } 226 if (police->tcfp_P_tab) 227 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); 228 police->tcf_action = parm->action; 229 230#ifdef CONFIG_NET_ESTIMATOR 231 if (tb[TCA_POLICE_AVRATE-1]) 232 police->tcfp_ewma_rate = 233 *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 234 if (est) 235 gen_replace_estimator(&police->tcf_bstats, 236 &police->tcf_rate_est, 237 police->tcf_stats_lock, est); 238#endif 239 240 spin_unlock_bh(&police->tcf_lock); 241 if (ret != ACT_P_CREATED) 242 return ret; 243 244 police->tcfp_t_c = psched_get_time(); 245 police->tcf_index = parm->index ? parm->index : 246 tcf_hash_new_index(&police_idx_gen, &police_hash_info); 247 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 248 write_lock_bh(&police_lock); 249 police->tcf_next = tcf_police_ht[h]; 250 tcf_police_ht[h] = &police->common; 251 write_unlock_bh(&police_lock); 252 253 a->priv = police; 254 return ret; 255 256failure: 257 if (ret == ACT_P_CREATED) 258 kfree(police); 259 return err; 260} 261 262static int tcf_act_police_cleanup(struct tc_action *a, int bind) 263{ 264 struct tcf_police *p = a->priv; 265 266 if (p != NULL) 267 return tcf_police_release(p, bind); 268 return 0; 269} 270 271static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, 272 struct tcf_result *res) 273{ 274 struct tcf_police *police = a->priv; 275 psched_time_t now; 276 long toks; 277 long ptoks = 0; 278 279 spin_lock(&police->tcf_lock); 280 281 police->tcf_bstats.bytes += skb->len; 282 police->tcf_bstats.packets++; 283 284#ifdef CONFIG_NET_ESTIMATOR 285 if (police->tcfp_ewma_rate && 286 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { 287 police->tcf_qstats.overlimits++; 288 spin_unlock(&police->tcf_lock); 289 return police->tcf_action; 290 } 291#endif 292 293 if (skb->len <= police->tcfp_mtu) { 294 if (police->tcfp_R_tab == NULL) { 295 spin_unlock(&police->tcf_lock); 296 return police->tcfp_result; 297 } 298 299 now = psched_get_time(); 300 toks = psched_tdiff_bounded(now, police->tcfp_t_c, 301 police->tcfp_burst); 302 if (police->tcfp_P_tab) { 303 ptoks = toks + police->tcfp_ptoks; 304 if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) 305 ptoks = (long)L2T_P(police, police->tcfp_mtu); 306 ptoks -= L2T_P(police, skb->len); 307 } 308 toks += police->tcfp_toks; 309 if (toks > (long)police->tcfp_burst) 310 toks = police->tcfp_burst; 311 toks -= L2T(police, skb->len); 312 if ((toks|ptoks) >= 0) { 313 police->tcfp_t_c = now; 314 police->tcfp_toks = toks; 315 police->tcfp_ptoks = ptoks; 316 spin_unlock(&police->tcf_lock); 317 return police->tcfp_result; 318 } 319 } 320 321 police->tcf_qstats.overlimits++; 322 spin_unlock(&police->tcf_lock); 323 return police->tcf_action; 324} 325 326static int 327tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) 328{ 329 unsigned char *b = skb_tail_pointer(skb); 330 struct tcf_police *police = a->priv; 331 struct tc_police opt; 332 333 opt.index = police->tcf_index; 334 opt.action = police->tcf_action; 335 opt.mtu = police->tcfp_mtu; 336 opt.burst = police->tcfp_burst; 337 opt.refcnt = police->tcf_refcnt - ref; 338 opt.bindcnt = police->tcf_bindcnt - bind; 339 if (police->tcfp_R_tab) 340 opt.rate = police->tcfp_R_tab->rate; 341 else 342 memset(&opt.rate, 0, sizeof(opt.rate)); 343 if (police->tcfp_P_tab) 344 opt.peakrate = police->tcfp_P_tab->rate; 345 else 346 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 347 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 348 if (police->tcfp_result) 349 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), 350 &police->tcfp_result); 351#ifdef CONFIG_NET_ESTIMATOR 352 if (police->tcfp_ewma_rate) 353 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); 354#endif 355 return skb->len; 356 357rtattr_failure: 358 nlmsg_trim(skb, b); 359 return -1; 360} 361 362MODULE_AUTHOR("Alexey Kuznetsov"); 363MODULE_DESCRIPTION("Policing actions"); 364MODULE_LICENSE("GPL"); 365 366static struct tc_action_ops act_police_ops = { 367 .kind = "police", 368 .hinfo = &police_hash_info, 369 .type = TCA_ID_POLICE, 370 .capab = TCA_CAP_NONE, 371 .owner = THIS_MODULE, 372 .act = tcf_act_police, 373 .dump = tcf_act_police_dump, 374 .cleanup = tcf_act_police_cleanup, 375 .lookup = tcf_hash_search, 376 .init = tcf_act_police_locate, 377 .walk = tcf_act_police_walker 378}; 379 380static int __init 381police_init_module(void) 382{ 383 return tcf_register_action(&act_police_ops); 384} 385 386static void __exit 387police_cleanup_module(void) 388{ 389 tcf_unregister_action(&act_police_ops); 390} 391 392module_init(police_init_module); 393module_exit(police_cleanup_module); 394 395#else /* CONFIG_NET_CLS_ACT */ 396 397static struct tcf_common *tcf_police_lookup(u32 index) 398{ 399 struct tcf_hashinfo *hinfo = &police_hash_info; 400 struct tcf_common *p; 401 402 read_lock(hinfo->lock); 403 for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; 404 p = p->tcfc_next) { 405 if (p->tcfc_index == index) 406 break; 407 } 408 read_unlock(hinfo->lock); 409 410 return p; 411} 412 413static u32 tcf_police_new_index(void) 414{ 415 u32 *idx_gen = &police_idx_gen; 416 u32 val = *idx_gen; 417 418 do { 419 if (++val == 0) 420 val = 1; 421 } while (tcf_police_lookup(val)); 422 423 return (*idx_gen = val); 424} 425 426struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) 427{ 428 unsigned int h; 429 struct tcf_police *police; 430 struct rtattr *tb[TCA_POLICE_MAX]; 431 struct tc_police *parm; 432 int size; 433 434 if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) 435 return NULL; 436 437 if (tb[TCA_POLICE_TBF-1] == NULL) 438 return NULL; 439 size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); 440 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) 441 return NULL; 442 443 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); 444 445 if (parm->index) { 446 struct tcf_common *pc; 447 448 pc = tcf_police_lookup(parm->index); 449 if (pc) { 450 police = to_police(pc); 451 police->tcf_refcnt++; 452 return police; 453 } 454 } 455 police = kzalloc(sizeof(*police), GFP_KERNEL); 456 if (unlikely(!police)) 457 return NULL; 458 459 police->tcf_refcnt = 1; 460 spin_lock_init(&police->tcf_lock); 461 police->tcf_stats_lock = &police->tcf_lock; 462 if (parm->rate.rate) { 463 police->tcfp_R_tab = 464 qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); 465 if (police->tcfp_R_tab == NULL) 466 goto failure; 467 if (parm->peakrate.rate) { 468 police->tcfp_P_tab = 469 qdisc_get_rtab(&parm->peakrate, 470 tb[TCA_POLICE_PEAKRATE-1]); 471 if (police->tcfp_P_tab == NULL) 472 goto failure; 473 } 474 } 475 if (tb[TCA_POLICE_RESULT-1]) { 476 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) 477 goto failure; 478 police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); 479 } 480#ifdef CONFIG_NET_ESTIMATOR 481 if (tb[TCA_POLICE_AVRATE-1]) { 482 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) 483 goto failure; 484 police->tcfp_ewma_rate = 485 *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); 486 } 487#endif 488 police->tcfp_toks = police->tcfp_burst = parm->burst; 489 police->tcfp_mtu = parm->mtu; 490 if (police->tcfp_mtu == 0) { 491 police->tcfp_mtu = ~0; 492 if (police->tcfp_R_tab) 493 police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log; 494 } 495 if (police->tcfp_P_tab) 496 police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); 497 police->tcfp_t_c = psched_get_time(); 498 police->tcf_index = parm->index ? parm->index : 499 tcf_police_new_index(); 500 police->tcf_action = parm->action; 501#ifdef CONFIG_NET_ESTIMATOR 502 if (est) 503 gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, 504 police->tcf_stats_lock, est); 505#endif 506 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 507 write_lock_bh(&police_lock); 508 police->tcf_next = tcf_police_ht[h]; 509 tcf_police_ht[h] = &police->common; 510 write_unlock_bh(&police_lock); 511 return police; 512 513failure: 514 if (police->tcfp_R_tab) 515 qdisc_put_rtab(police->tcfp_R_tab); 516 kfree(police); 517 return NULL; 518} 519 520int tcf_police(struct sk_buff *skb, struct tcf_police *police) 521{ 522 psched_time_t now; 523 long toks; 524 long ptoks = 0; 525 526 spin_lock(&police->tcf_lock); 527 528 police->tcf_bstats.bytes += skb->len; 529 police->tcf_bstats.packets++; 530 531#ifdef CONFIG_NET_ESTIMATOR 532 if (police->tcfp_ewma_rate && 533 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { 534 police->tcf_qstats.overlimits++; 535 spin_unlock(&police->tcf_lock); 536 return police->tcf_action; 537 } 538#endif 539 if (skb->len <= police->tcfp_mtu) { 540 if (police->tcfp_R_tab == NULL) { 541 spin_unlock(&police->tcf_lock); 542 return police->tcfp_result; 543 } 544 545 now = psched_get_time(); 546 toks = psched_tdiff_bounded(now, police->tcfp_t_c, 547 police->tcfp_burst); 548 if (police->tcfp_P_tab) { 549 ptoks = toks + police->tcfp_ptoks; 550 if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) 551 ptoks = (long)L2T_P(police, police->tcfp_mtu); 552 ptoks -= L2T_P(police, skb->len); 553 } 554 toks += police->tcfp_toks; 555 if (toks > (long)police->tcfp_burst) 556 toks = police->tcfp_burst; 557 toks -= L2T(police, skb->len); 558 if ((toks|ptoks) >= 0) { 559 police->tcfp_t_c = now; 560 police->tcfp_toks = toks; 561 police->tcfp_ptoks = ptoks; 562 spin_unlock(&police->tcf_lock); 563 return police->tcfp_result; 564 } 565 } 566 567 police->tcf_qstats.overlimits++; 568 spin_unlock(&police->tcf_lock); 569 return police->tcf_action; 570} 571EXPORT_SYMBOL(tcf_police); 572 573int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) 574{ 575 unsigned char *b = skb_tail_pointer(skb); 576 struct tc_police opt; 577 578 opt.index = police->tcf_index; 579 opt.action = police->tcf_action; 580 opt.mtu = police->tcfp_mtu; 581 opt.burst = police->tcfp_burst; 582 if (police->tcfp_R_tab) 583 opt.rate = police->tcfp_R_tab->rate; 584 else 585 memset(&opt.rate, 0, sizeof(opt.rate)); 586 if (police->tcfp_P_tab) 587 opt.peakrate = police->tcfp_P_tab->rate; 588 else 589 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 590 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); 591 if (police->tcfp_result) 592 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), 593 &police->tcfp_result); 594#ifdef CONFIG_NET_ESTIMATOR 595 if (police->tcfp_ewma_rate) 596 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); 597#endif 598 return skb->len; 599 600rtattr_failure: 601 nlmsg_trim(skb, b); 602 return -1; 603} 604 605int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) 606{ 607 struct gnet_dump d; 608 609 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, 610 TCA_XSTATS, police->tcf_stats_lock, 611 &d) < 0) 612 goto errout; 613 614 if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || 615#ifdef CONFIG_NET_ESTIMATOR 616 gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || 617#endif 618 gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) 619 goto errout; 620 621 if (gnet_stats_finish_copy(&d) < 0) 622 goto errout; 623 624 return 0; 625 626errout: 627 return -1; 628} 629 630#endif /* CONFIG_NET_CLS_ACT */ 631