1/* 2 * Linux Socket Filter - Kernel level socket filtering 3 * 4 * Author: 5 * Jay Schulist <jschlst@samba.org> 6 * 7 * Based on the design of: 8 * - The Berkeley Packet Filter 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Andi Kleen - Fix a few bad bugs and races. 16 * Kris Katterjohn - Added many additional checks in sk_chk_filter() 17 */ 18 19#include <linux/module.h> 20#include <linux/types.h> 21#include <linux/mm.h> 22#include <linux/fcntl.h> 23#include <linux/socket.h> 24#include <linux/in.h> 25#include <linux/inet.h> 26#include <linux/netdevice.h> 27#include <linux/if_packet.h> 28#include <linux/gfp.h> 29#include <net/ip.h> 30#include <net/protocol.h> 31#include <net/netlink.h> 32#include <linux/skbuff.h> 33#include <net/sock.h> 34#include <linux/errno.h> 35#include <linux/timer.h> 36#include <asm/system.h> 37#include <asm/uaccess.h> 38#include <asm/unaligned.h> 39#include <linux/filter.h> 40 41/* No hurry in this branch */ 42static void *__load_pointer(struct sk_buff *skb, int k) 43{ 44 u8 *ptr = NULL; 45 46 if (k >= SKF_NET_OFF) 47 ptr = skb_network_header(skb) + k - SKF_NET_OFF; 48 else if (k >= SKF_LL_OFF) 49 ptr = skb_mac_header(skb) + k - SKF_LL_OFF; 50 51 if (ptr >= skb->head && ptr < skb_tail_pointer(skb)) 52 return ptr; 53 return NULL; 54} 55 56static inline void *load_pointer(struct sk_buff *skb, int k, 57 unsigned int size, void *buffer) 58{ 59 if (k >= 0) 60 return skb_header_pointer(skb, k, size, buffer); 61 else { 62 if (k >= SKF_AD_OFF) 63 return NULL; 64 return __load_pointer(skb, k); 65 } 66} 67 68/** 69 * sk_filter - run a packet through a socket filter 70 * @sk: sock associated with &sk_buff 71 * @skb: buffer to filter 72 * 73 * Run the filter code and then cut skb->data to correct size returned by 74 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller 75 * than pkt_len we keep whole skb->data. This is the socket level 76 * wrapper to sk_run_filter. It returns 0 if the packet should 77 * be accepted or -EPERM if the packet should be tossed. 78 * 79 */ 80int sk_filter(struct sock *sk, struct sk_buff *skb) 81{ 82 int err; 83 struct sk_filter *filter; 84 85 err = security_sock_rcv_skb(sk, skb); 86 if (err) 87 return err; 88 89 rcu_read_lock_bh(); 90 filter = rcu_dereference_bh(sk->sk_filter); 91 if (filter) { 92 unsigned int pkt_len = sk_run_filter(skb, filter->insns, 93 filter->len); 94 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 95 } 96 rcu_read_unlock_bh(); 97 98 return err; 99} 100EXPORT_SYMBOL(sk_filter); 101 102/** 103 * sk_run_filter - run a filter on a socket 104 * @skb: buffer to run the filter on 105 * @filter: filter to apply 106 * @flen: length of filter 107 * 108 * Decode and apply filter instructions to the skb->data. 109 * Return length to keep, 0 for none. skb is the data we are 110 * filtering, filter is the array of filter instructions, and 111 * len is the number of filter blocks in the array. 112 */ 113unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 114{ 115 void *ptr; 116 u32 A = 0; /* Accumulator */ 117 u32 X = 0; /* Index Register */ 118 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 119 unsigned long memvalid = 0; 120 u32 tmp; 121 int k; 122 int pc; 123 124 BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG); 125 /* 126 * Process array of filter instructions. 127 */ 128 for (pc = 0; pc < flen; pc++) { 129 const struct sock_filter *fentry = &filter[pc]; 130 u32 f_k = fentry->k; 131 132 switch (fentry->code) { 133 case BPF_S_ALU_ADD_X: 134 A += X; 135 continue; 136 case BPF_S_ALU_ADD_K: 137 A += f_k; 138 continue; 139 case BPF_S_ALU_SUB_X: 140 A -= X; 141 continue; 142 case BPF_S_ALU_SUB_K: 143 A -= f_k; 144 continue; 145 case BPF_S_ALU_MUL_X: 146 A *= X; 147 continue; 148 case BPF_S_ALU_MUL_K: 149 A *= f_k; 150 continue; 151 case BPF_S_ALU_DIV_X: 152 if (X == 0) 153 return 0; 154 A /= X; 155 continue; 156 case BPF_S_ALU_DIV_K: 157 A /= f_k; 158 continue; 159 case BPF_S_ALU_AND_X: 160 A &= X; 161 continue; 162 case BPF_S_ALU_AND_K: 163 A &= f_k; 164 continue; 165 case BPF_S_ALU_OR_X: 166 A |= X; 167 continue; 168 case BPF_S_ALU_OR_K: 169 A |= f_k; 170 continue; 171 case BPF_S_ALU_LSH_X: 172 A <<= X; 173 continue; 174 case BPF_S_ALU_LSH_K: 175 A <<= f_k; 176 continue; 177 case BPF_S_ALU_RSH_X: 178 A >>= X; 179 continue; 180 case BPF_S_ALU_RSH_K: 181 A >>= f_k; 182 continue; 183 case BPF_S_ALU_NEG: 184 A = -A; 185 continue; 186 case BPF_S_JMP_JA: 187 pc += f_k; 188 continue; 189 case BPF_S_JMP_JGT_K: 190 pc += (A > f_k) ? fentry->jt : fentry->jf; 191 continue; 192 case BPF_S_JMP_JGE_K: 193 pc += (A >= f_k) ? fentry->jt : fentry->jf; 194 continue; 195 case BPF_S_JMP_JEQ_K: 196 pc += (A == f_k) ? fentry->jt : fentry->jf; 197 continue; 198 case BPF_S_JMP_JSET_K: 199 pc += (A & f_k) ? fentry->jt : fentry->jf; 200 continue; 201 case BPF_S_JMP_JGT_X: 202 pc += (A > X) ? fentry->jt : fentry->jf; 203 continue; 204 case BPF_S_JMP_JGE_X: 205 pc += (A >= X) ? fentry->jt : fentry->jf; 206 continue; 207 case BPF_S_JMP_JEQ_X: 208 pc += (A == X) ? fentry->jt : fentry->jf; 209 continue; 210 case BPF_S_JMP_JSET_X: 211 pc += (A & X) ? fentry->jt : fentry->jf; 212 continue; 213 case BPF_S_LD_W_ABS: 214 k = f_k; 215load_w: 216 ptr = load_pointer(skb, k, 4, &tmp); 217 if (ptr != NULL) { 218 A = get_unaligned_be32(ptr); 219 continue; 220 } 221 break; 222 case BPF_S_LD_H_ABS: 223 k = f_k; 224load_h: 225 ptr = load_pointer(skb, k, 2, &tmp); 226 if (ptr != NULL) { 227 A = get_unaligned_be16(ptr); 228 continue; 229 } 230 break; 231 case BPF_S_LD_B_ABS: 232 k = f_k; 233load_b: 234 ptr = load_pointer(skb, k, 1, &tmp); 235 if (ptr != NULL) { 236 A = *(u8 *)ptr; 237 continue; 238 } 239 break; 240 case BPF_S_LD_W_LEN: 241 A = skb->len; 242 continue; 243 case BPF_S_LDX_W_LEN: 244 X = skb->len; 245 continue; 246 case BPF_S_LD_W_IND: 247 k = X + f_k; 248 goto load_w; 249 case BPF_S_LD_H_IND: 250 k = X + f_k; 251 goto load_h; 252 case BPF_S_LD_B_IND: 253 k = X + f_k; 254 goto load_b; 255 case BPF_S_LDX_B_MSH: 256 ptr = load_pointer(skb, f_k, 1, &tmp); 257 if (ptr != NULL) { 258 X = (*(u8 *)ptr & 0xf) << 2; 259 continue; 260 } 261 return 0; 262 case BPF_S_LD_IMM: 263 A = f_k; 264 continue; 265 case BPF_S_LDX_IMM: 266 X = f_k; 267 continue; 268 case BPF_S_LD_MEM: 269 A = (memvalid & (1UL << f_k)) ? 270 mem[f_k] : 0; 271 continue; 272 case BPF_S_LDX_MEM: 273 X = (memvalid & (1UL << f_k)) ? 274 mem[f_k] : 0; 275 continue; 276 case BPF_S_MISC_TAX: 277 X = A; 278 continue; 279 case BPF_S_MISC_TXA: 280 A = X; 281 continue; 282 case BPF_S_RET_K: 283 return f_k; 284 case BPF_S_RET_A: 285 return A; 286 case BPF_S_ST: 287 memvalid |= 1UL << f_k; 288 mem[f_k] = A; 289 continue; 290 case BPF_S_STX: 291 memvalid |= 1UL << f_k; 292 mem[f_k] = X; 293 continue; 294 default: 295 WARN_ON(1); 296 return 0; 297 } 298 299 /* 300 * Handle ancillary data, which are impossible 301 * (or very difficult) to get parsing packet contents. 302 */ 303 switch (k-SKF_AD_OFF) { 304 case SKF_AD_PROTOCOL: 305 A = ntohs(skb->protocol); 306 continue; 307 case SKF_AD_PKTTYPE: 308 A = skb->pkt_type; 309 continue; 310 case SKF_AD_IFINDEX: 311 if (!skb->dev) 312 return 0; 313 A = skb->dev->ifindex; 314 continue; 315 case SKF_AD_MARK: 316 A = skb->mark; 317 continue; 318 case SKF_AD_QUEUE: 319 A = skb->queue_mapping; 320 continue; 321 case SKF_AD_HATYPE: 322 if (!skb->dev) 323 return 0; 324 A = skb->dev->type; 325 continue; 326 case SKF_AD_NLATTR: { 327 struct nlattr *nla; 328 329 if (skb_is_nonlinear(skb)) 330 return 0; 331 if (A > skb->len - sizeof(struct nlattr)) 332 return 0; 333 334 nla = nla_find((struct nlattr *)&skb->data[A], 335 skb->len - A, X); 336 if (nla) 337 A = (void *)nla - (void *)skb->data; 338 else 339 A = 0; 340 continue; 341 } 342 case SKF_AD_NLATTR_NEST: { 343 struct nlattr *nla; 344 345 if (skb_is_nonlinear(skb)) 346 return 0; 347 if (A > skb->len - sizeof(struct nlattr)) 348 return 0; 349 350 nla = (struct nlattr *)&skb->data[A]; 351 if (nla->nla_len > A - skb->len) 352 return 0; 353 354 nla = nla_find_nested(nla, X); 355 if (nla) 356 A = (void *)nla - (void *)skb->data; 357 else 358 A = 0; 359 continue; 360 } 361 default: 362 return 0; 363 } 364 } 365 366 return 0; 367} 368EXPORT_SYMBOL(sk_run_filter); 369 370/** 371 * sk_chk_filter - verify socket filter code 372 * @filter: filter to verify 373 * @flen: length of filter 374 * 375 * Check the user's filter code. If we let some ugly 376 * filter code slip through kaboom! The filter must contain 377 * no references or jumps that are out of range, no illegal 378 * instructions, and must end with a RET instruction. 379 * 380 * All jumps are forward as they are not signed. 381 * 382 * Returns 0 if the rule set is legal or -EINVAL if not. 383 */ 384int sk_chk_filter(struct sock_filter *filter, int flen) 385{ 386 struct sock_filter *ftest; 387 int pc; 388 389 if (flen == 0 || flen > BPF_MAXINSNS) 390 return -EINVAL; 391 392 /* check the filter code now */ 393 for (pc = 0; pc < flen; pc++) { 394 ftest = &filter[pc]; 395 396 /* Only allow valid instructions */ 397 switch (ftest->code) { 398 case BPF_ALU|BPF_ADD|BPF_K: 399 ftest->code = BPF_S_ALU_ADD_K; 400 break; 401 case BPF_ALU|BPF_ADD|BPF_X: 402 ftest->code = BPF_S_ALU_ADD_X; 403 break; 404 case BPF_ALU|BPF_SUB|BPF_K: 405 ftest->code = BPF_S_ALU_SUB_K; 406 break; 407 case BPF_ALU|BPF_SUB|BPF_X: 408 ftest->code = BPF_S_ALU_SUB_X; 409 break; 410 case BPF_ALU|BPF_MUL|BPF_K: 411 ftest->code = BPF_S_ALU_MUL_K; 412 break; 413 case BPF_ALU|BPF_MUL|BPF_X: 414 ftest->code = BPF_S_ALU_MUL_X; 415 break; 416 case BPF_ALU|BPF_DIV|BPF_X: 417 ftest->code = BPF_S_ALU_DIV_X; 418 break; 419 case BPF_ALU|BPF_AND|BPF_K: 420 ftest->code = BPF_S_ALU_AND_K; 421 break; 422 case BPF_ALU|BPF_AND|BPF_X: 423 ftest->code = BPF_S_ALU_AND_X; 424 break; 425 case BPF_ALU|BPF_OR|BPF_K: 426 ftest->code = BPF_S_ALU_OR_K; 427 break; 428 case BPF_ALU|BPF_OR|BPF_X: 429 ftest->code = BPF_S_ALU_OR_X; 430 break; 431 case BPF_ALU|BPF_LSH|BPF_K: 432 ftest->code = BPF_S_ALU_LSH_K; 433 break; 434 case BPF_ALU|BPF_LSH|BPF_X: 435 ftest->code = BPF_S_ALU_LSH_X; 436 break; 437 case BPF_ALU|BPF_RSH|BPF_K: 438 ftest->code = BPF_S_ALU_RSH_K; 439 break; 440 case BPF_ALU|BPF_RSH|BPF_X: 441 ftest->code = BPF_S_ALU_RSH_X; 442 break; 443 case BPF_ALU|BPF_NEG: 444 ftest->code = BPF_S_ALU_NEG; 445 break; 446 case BPF_LD|BPF_W|BPF_ABS: 447 ftest->code = BPF_S_LD_W_ABS; 448 break; 449 case BPF_LD|BPF_H|BPF_ABS: 450 ftest->code = BPF_S_LD_H_ABS; 451 break; 452 case BPF_LD|BPF_B|BPF_ABS: 453 ftest->code = BPF_S_LD_B_ABS; 454 break; 455 case BPF_LD|BPF_W|BPF_LEN: 456 ftest->code = BPF_S_LD_W_LEN; 457 break; 458 case BPF_LD|BPF_W|BPF_IND: 459 ftest->code = BPF_S_LD_W_IND; 460 break; 461 case BPF_LD|BPF_H|BPF_IND: 462 ftest->code = BPF_S_LD_H_IND; 463 break; 464 case BPF_LD|BPF_B|BPF_IND: 465 ftest->code = BPF_S_LD_B_IND; 466 break; 467 case BPF_LD|BPF_IMM: 468 ftest->code = BPF_S_LD_IMM; 469 break; 470 case BPF_LDX|BPF_W|BPF_LEN: 471 ftest->code = BPF_S_LDX_W_LEN; 472 break; 473 case BPF_LDX|BPF_B|BPF_MSH: 474 ftest->code = BPF_S_LDX_B_MSH; 475 break; 476 case BPF_LDX|BPF_IMM: 477 ftest->code = BPF_S_LDX_IMM; 478 break; 479 case BPF_MISC|BPF_TAX: 480 ftest->code = BPF_S_MISC_TAX; 481 break; 482 case BPF_MISC|BPF_TXA: 483 ftest->code = BPF_S_MISC_TXA; 484 break; 485 case BPF_RET|BPF_K: 486 ftest->code = BPF_S_RET_K; 487 break; 488 case BPF_RET|BPF_A: 489 ftest->code = BPF_S_RET_A; 490 break; 491 492 /* Some instructions need special checks */ 493 494 /* check for division by zero */ 495 case BPF_ALU|BPF_DIV|BPF_K: 496 if (ftest->k == 0) 497 return -EINVAL; 498 ftest->code = BPF_S_ALU_DIV_K; 499 break; 500 501 /* check for invalid memory addresses */ 502 case BPF_LD|BPF_MEM: 503 if (ftest->k >= BPF_MEMWORDS) 504 return -EINVAL; 505 ftest->code = BPF_S_LD_MEM; 506 break; 507 case BPF_LDX|BPF_MEM: 508 if (ftest->k >= BPF_MEMWORDS) 509 return -EINVAL; 510 ftest->code = BPF_S_LDX_MEM; 511 break; 512 case BPF_ST: 513 if (ftest->k >= BPF_MEMWORDS) 514 return -EINVAL; 515 ftest->code = BPF_S_ST; 516 break; 517 case BPF_STX: 518 if (ftest->k >= BPF_MEMWORDS) 519 return -EINVAL; 520 ftest->code = BPF_S_STX; 521 break; 522 523 case BPF_JMP|BPF_JA: 524 /* 525 * Note, the large ftest->k might cause loops. 526 * Compare this with conditional jumps below, 527 * where offsets are limited. --ANK (981016) 528 */ 529 if (ftest->k >= (unsigned)(flen-pc-1)) 530 return -EINVAL; 531 ftest->code = BPF_S_JMP_JA; 532 break; 533 534 case BPF_JMP|BPF_JEQ|BPF_K: 535 ftest->code = BPF_S_JMP_JEQ_K; 536 break; 537 case BPF_JMP|BPF_JEQ|BPF_X: 538 ftest->code = BPF_S_JMP_JEQ_X; 539 break; 540 case BPF_JMP|BPF_JGE|BPF_K: 541 ftest->code = BPF_S_JMP_JGE_K; 542 break; 543 case BPF_JMP|BPF_JGE|BPF_X: 544 ftest->code = BPF_S_JMP_JGE_X; 545 break; 546 case BPF_JMP|BPF_JGT|BPF_K: 547 ftest->code = BPF_S_JMP_JGT_K; 548 break; 549 case BPF_JMP|BPF_JGT|BPF_X: 550 ftest->code = BPF_S_JMP_JGT_X; 551 break; 552 case BPF_JMP|BPF_JSET|BPF_K: 553 ftest->code = BPF_S_JMP_JSET_K; 554 break; 555 case BPF_JMP|BPF_JSET|BPF_X: 556 ftest->code = BPF_S_JMP_JSET_X; 557 break; 558 559 default: 560 return -EINVAL; 561 } 562 563 /* for conditionals both must be safe */ 564 switch (ftest->code) { 565 case BPF_S_JMP_JEQ_K: 566 case BPF_S_JMP_JEQ_X: 567 case BPF_S_JMP_JGE_K: 568 case BPF_S_JMP_JGE_X: 569 case BPF_S_JMP_JGT_K: 570 case BPF_S_JMP_JGT_X: 571 case BPF_S_JMP_JSET_X: 572 case BPF_S_JMP_JSET_K: 573 if (pc + ftest->jt + 1 >= flen || 574 pc + ftest->jf + 1 >= flen) 575 return -EINVAL; 576 } 577 } 578 579 /* last instruction must be a RET code */ 580 switch (filter[flen - 1].code) { 581 case BPF_S_RET_K: 582 case BPF_S_RET_A: 583 return 0; 584 break; 585 default: 586 return -EINVAL; 587 } 588} 589EXPORT_SYMBOL(sk_chk_filter); 590 591/** 592 * sk_filter_release_rcu - Release a socket filter by rcu_head 593 * @rcu: rcu_head that contains the sk_filter to free 594 */ 595void sk_filter_release_rcu(struct rcu_head *rcu) 596{ 597 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 598 599 kfree(fp); 600} 601EXPORT_SYMBOL(sk_filter_release_rcu); 602 603/** 604 * sk_attach_filter - attach a socket filter 605 * @fprog: the filter program 606 * @sk: the socket to use 607 * 608 * Attach the user's filter code. We first run some sanity checks on 609 * it to make sure it does not explode on us later. If an error 610 * occurs or there is insufficient memory for the filter a negative 611 * errno code is returned. On success the return is zero. 612 */ 613int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 614{ 615 struct sk_filter *fp, *old_fp; 616 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 617 int err; 618 619 /* Make sure new filter is there and in the right amounts. */ 620 if (fprog->filter == NULL) 621 return -EINVAL; 622 623 fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); 624 if (!fp) 625 return -ENOMEM; 626 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 627 sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 628 return -EFAULT; 629 } 630 631 atomic_set(&fp->refcnt, 1); 632 fp->len = fprog->len; 633 634 err = sk_chk_filter(fp->insns, fp->len); 635 if (err) { 636 sk_filter_uncharge(sk, fp); 637 return err; 638 } 639 640 rcu_read_lock_bh(); 641 old_fp = rcu_dereference_bh(sk->sk_filter); 642 rcu_assign_pointer(sk->sk_filter, fp); 643 rcu_read_unlock_bh(); 644 645 if (old_fp) 646 sk_filter_uncharge(sk, old_fp); 647 return 0; 648} 649EXPORT_SYMBOL_GPL(sk_attach_filter); 650 651int sk_detach_filter(struct sock *sk) 652{ 653 int ret = -ENOENT; 654 struct sk_filter *filter; 655 656 rcu_read_lock_bh(); 657 filter = rcu_dereference_bh(sk->sk_filter); 658 if (filter) { 659 rcu_assign_pointer(sk->sk_filter, NULL); 660 sk_filter_uncharge(sk, filter); 661 ret = 0; 662 } 663 rcu_read_unlock_bh(); 664 return ret; 665} 666EXPORT_SYMBOL_GPL(sk_detach_filter); 667