1/* $NetBSD: in_selsrc.c,v 1.17 2016/07/07 09:32:02 ozaki-r Exp $ */ 2 3/*- 4 * Copyright (c) 2005 David Young. All rights reserved. 5 * 6 * This code was written by David Young. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include <sys/cdefs.h> 32__KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.17 2016/07/07 09:32:02 ozaki-r Exp $"); 33 34#ifdef _KERNEL_OPT 35#include "opt_inet.h" 36#include "opt_inet_conf.h" 37#endif 38 39#include <lib/libkern/libkern.h> 40 41#include <sys/param.h> 42#include <sys/ioctl.h> 43#include <sys/errno.h> 44#include <sys/malloc.h> 45#include <sys/socket.h> 46#include <sys/socketvar.h> 47#include <sys/sysctl.h> 48#include <sys/systm.h> 49#include <sys/proc.h> 50#include <sys/syslog.h> 51 52#include <net/if.h> 53 54#include <net/if_ether.h> 55 56#include <netinet/in_systm.h> 57#include <netinet/in.h> 58#include <netinet/in_var.h> 59#include <netinet/ip.h> 60#include <netinet/ip_var.h> 61#include <netinet/in_ifattach.h> 62#include <netinet/in_pcb.h> 63#include <netinet/if_inarp.h> 64#include <netinet/ip_mroute.h> 65#include <netinet/igmp_var.h> 66#include <netinet/in_selsrc.h> 67 68#ifdef INET 69struct score_src_name { 70 const char *sn_name; 71 const in_score_src_t sn_score_src; 72}; 73 74static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *); 75static int in_preference(const struct in_addr *, int, int, 76 const struct in_addr *); 77static int in_index(const struct in_addr *, int, int, const struct in_addr *); 78static int in_matchlen(const struct in_addr *, int, int, 79 const struct in_addr *); 80static int in_match_category(const struct in_addr *, int, int, 81 const struct in_addr *); 82static size_t in_get_selectsrc(const struct in_ifselsrc *, char *, 83 const size_t); 84static int in_set_selectsrc(struct in_ifselsrc *, char *buf); 85static int in_sysctl_selectsrc(SYSCTLFN_PROTO); 86static in_score_src_t name_to_score_src(const char *); 87static const char *score_src_to_name(const in_score_src_t); 88static void in_score(const in_score_src_t *, int *, int *, 89 const struct in_addr *, int, int, const struct in_addr *); 90 91static const struct score_src_name score_src_names[] = { 92 {"same-category", in_match_category} 93 , {"common-prefix-len", in_matchlen} 94 , {"index", in_index} 95 , {"preference", in_preference} 96 , {NULL, NULL} 97}; 98 99static const struct in_ifselsrc initial_iss = { 0, {NULL} }; 100 101static struct in_ifselsrc default_iss = { 0, {in_index} }; 102 103#ifdef GETIFA_DEBUG 104int in_selsrc_debug = 0; 105#endif /* GETIFA_DEBUG */ 106 107SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup") 108{ 109 int rc; 110 const struct sysctlnode *rnode, *cnode; 111 112 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 113 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet", 114 NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) { 115 printf("%s: could not create net.inet, rc = %d\n", __func__, 116 rc); 117 return; 118 } 119 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 120 CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip", 121 NULL, NULL, 0, NULL, 0, 122 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) { 123 printf("%s: could not create net.inet.ip, rc = %d\n", __func__, 124 rc); 125 return; 126 } 127 if ((rc = sysctl_createv(clog, 0, NULL, &rnode, 128 CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc", 129 NULL, NULL, 0, NULL, 0, 130 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) { 131 printf("%s: could not create net.inet.ip.selectsrc, " 132 "rc = %d\n", __func__, rc); 133 return; 134 } 135#ifdef GETIFA_DEBUG 136 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode, 137 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug", 138 SYSCTL_DESCR("enable source-selection debug messages"), 139 NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) { 140 printf("%s: could not create net.inet.ip.selectsrc.debug, " 141 "rc = %d\n", __func__, rc); 142 return; 143 } 144#endif /* GETIFA_DEBUG */ 145 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode, 146 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "default", 147 SYSCTL_DESCR("default source selection policy"), 148 in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN, 149 CTL_CREATE, CTL_EOL)) != 0) { 150 printf( 151 "%s: could not create net.inet.ip.selectsrc.default (%d)\n", 152 __func__, rc); 153 return; 154 } 155} 156 157/* 158 * Score by address preference: prefer addresses with higher preference 159 * number. Preference numbers are assigned with ioctl SIOCSIFADDRPREF. 160 */ 161static int 162in_preference(const struct in_addr *src, int preference, 163 int idx, const struct in_addr *dst) 164{ 165 return preference; 166} 167 168/* 169 * Score by address "index": prefer addresses nearer the head of 170 * the ifaddr list. 171 */ 172static int 173in_index(const struct in_addr *src, int preference, int idx, 174 const struct in_addr *dst) 175{ 176 return -idx; 177} 178 179/* 180 * Length of longest common prefix of src and dst. 181 * 182 * (Derived from in6_matchlen.) 183 */ 184static int 185in_matchlen(const struct in_addr *src, int preference, 186 int idx, const struct in_addr *dst) 187{ 188 int match = 0; 189 const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst; 190 const uint8_t *lim = s + 4; 191 uint_fast8_t r = 0; 192 193 while (s < lim && (r = (*d++ ^ *s++)) == 0) 194 match += 8; 195 196 if (s == lim) 197 return match; 198 199 while ((r & 0x80) == 0) { 200 match++; 201 r <<= 1; 202 } 203 return match; 204} 205 206static enum in_category 207in_categorize(const struct in_addr *s) 208{ 209 if (IN_ANY_LOCAL(s->s_addr)) 210 return IN_CATEGORY_LINKLOCAL; 211 else if (IN_PRIVATE(s->s_addr)) 212 return IN_CATEGORY_PRIVATE; 213 else 214 return IN_CATEGORY_OTHER; 215} 216 217static int 218in_match_category(const struct in_addr *src, int preference, 219 int idx, const struct in_addr *dst) 220{ 221 enum in_category dst_c = in_categorize(dst), 222 src_c = in_categorize(src); 223#ifdef GETIFA_DEBUG 224 if (in_selsrc_debug) { 225 printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32 226 " categ %d\n", __func__, ntohl(dst->s_addr), dst_c, 227 ntohl(src->s_addr), src_c); 228 } 229#endif /* GETIFA_DEBUG */ 230 231 if (dst_c == src_c) 232 return 2; 233 else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE) 234 return 1; 235 else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL) 236 return 1; 237 else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE) 238 return 1; 239 else 240 return 0; 241} 242 243static void 244in_score(const in_score_src_t *score_src, int *score, int *scorelenp, 245 const struct in_addr *src, int preference, int idx, 246 const struct in_addr *dst) 247{ 248 int i; 249 250 for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++) 251 score[i] = (*score_src[i])(src, preference, idx, dst); 252 if (scorelenp != NULL) 253 *scorelenp = i; 254} 255 256static int 257in_score_cmp(int *score1, int *score2, int scorelen) 258{ 259 int i; 260 261 for (i = 0; i < scorelen; i++) { 262 if (score1[i] == score2[i]) 263 continue; 264 return score1[i] - score2[i]; 265 } 266 return 0; 267} 268 269#ifdef GETIFA_DEBUG 270static void 271in_score_println(int *score, int scorelen) 272{ 273 int i; 274 const char *delim = "["; 275 276 for (i = 0; i < scorelen; i++) { 277 printf("%s%d", delim, score[i]); 278 delim = ", "; 279 } 280 printf("]\n"); 281} 282#endif /* GETIFA_DEBUG */ 283 284/* Scan the interface addresses on the interface ifa->ifa_ifp for 285 * the source address that best matches the destination, dst0, 286 * according to the source address-selection policy for this 287 * interface. If there is no better match than `ifa', return `ifa'. 288 * Otherwise, return the best address. 289 * 290 * Note that in_getifa is called after the kernel has decided which 291 * output interface to use (ifa->ifa_ifp), and in_getifa will not 292 * scan an address belonging to any other interface. 293 */ 294struct ifaddr * 295in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0) 296{ 297 const in_score_src_t *score_src; 298 int idx, scorelen; 299 const struct sockaddr_in *dst, *src; 300 struct ifaddr *alt_ifa, *best_ifa; 301 struct ifnet *ifp; 302 struct in_ifsysctl *isc; 303 struct in_ifselsrc *iss; 304 int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX]; 305 struct in_ifaddr *ia; 306 307 if (ifa->ifa_addr->sa_family != AF_INET || 308 dst0 == NULL || dst0->sa_family != AF_INET) { /* Possible. */ 309 ifa->ifa_seqno = NULL; 310 return ifa; 311 } 312 313 ifp = ifa->ifa_ifp; 314 KASSERT(ifp->if_afdata[AF_INET] != NULL); 315 isc = ((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_selsrc; 316 if (isc != NULL && isc->isc_selsrc != NULL && 317 isc->isc_selsrc->iss_score_src[0] != NULL) 318 iss = isc->isc_selsrc; 319 else 320 iss = &default_iss; 321 score_src = &iss->iss_score_src[0]; 322 323 dst = (const struct sockaddr_in *)dst0; 324 325 best_ifa = ifa; 326 327 /* Find out the index of this ifaddr. */ 328 idx = 0; 329 IFADDR_READER_FOREACH(alt_ifa, ifa->ifa_ifp) { 330 if (alt_ifa == best_ifa) 331 break; 332 idx++; 333 } 334 in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr, 335 best_ifa->ifa_preference, idx, &dst->sin_addr); 336 337#ifdef GETIFA_DEBUG 338 if (in_selsrc_debug) { 339 printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ", 340 __func__, ntohl(dst->sin_addr.s_addr), 341 ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr)); 342 in_score_println(best_score, scorelen); 343 } 344#endif /* GETIFA_DEBUG */ 345 346 idx = -1; 347 IFADDR_READER_FOREACH(alt_ifa, ifa->ifa_ifp) { 348 ++idx; 349 src = IA_SIN(alt_ifa); 350 351 if (alt_ifa == ifa || src->sin_family != AF_INET) 352 continue; 353 ia = (struct in_ifaddr *)alt_ifa; 354 if (ia->ia4_flags & IN_IFF_NOTREADY) 355 continue; 356 357 in_score(score_src, score, NULL, &src->sin_addr, 358 alt_ifa->ifa_preference, idx, &dst->sin_addr); 359 360#ifdef GETIFA_DEBUG 361 if (in_selsrc_debug) { 362 printf("%s: src %#" PRIx32 " score ", __func__, 363 ntohl(src->sin_addr.s_addr)); 364 in_score_println(score, scorelen); 365 } 366#endif /* GETIFA_DEBUG */ 367 368 if (in_score_cmp(score, best_score, scorelen) > 0) { 369 (void)memcpy(best_score, score, sizeof(best_score)); 370 best_ifa = alt_ifa; 371 } 372 } 373 374 ia = (struct in_ifaddr *)best_ifa; 375 if (ia->ia4_flags & IN_IFF_NOTREADY) 376 return NULL; 377 378#ifdef GETIFA_DEBUG 379 if (in_selsrc_debug) { 380 printf("%s: choose src %#" PRIx32 " score ", __func__, 381 ntohl(IA_SIN(best_ifa)->sin_addr.s_addr)); 382 in_score_println(best_score, scorelen); 383 } 384#endif /* GETIFA_DEBUG */ 385 386 best_ifa->ifa_seqno = &iss->iss_seqno; 387 return best_ifa; 388} 389 390static in_score_src_t 391name_to_score_src(const char *name) 392{ 393 int i; 394 395 for (i = 0; score_src_names[i].sn_name != NULL; i++) { 396 if (strcmp(score_src_names[i].sn_name, name) == 0) 397 return score_src_names[i].sn_score_src; 398 } 399 return NULL; 400} 401 402static const char * 403score_src_to_name(const in_score_src_t score_src) 404{ 405 int i; 406 for (i = 0; score_src_names[i].sn_name != NULL; i++) { 407 if (score_src == score_src_names[i].sn_score_src) 408 return score_src_names[i].sn_name; 409 } 410 return "<unknown>"; 411} 412 413static size_t 414in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0, 415 const size_t buflen0) 416{ 417 int i, rc; 418 char *buf = buf0; 419 const char *delim; 420 size_t buflen = buflen0; 421 422 KASSERT(buflen >= 1); 423 424 for (delim = "", i = 0; 425 i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL; 426 delim = ",", i++) { 427 rc = snprintf(buf, buflen, "%s%s", 428 delim, score_src_to_name(iss->iss_score_src[i])); 429 if (rc == -1) 430 return buflen0 - buflen; 431 if (rc >= buflen) 432 return buflen0 + rc - buflen; 433 buf += rc; 434 buflen -= rc; 435 } 436 if (buf == buf0) 437 *buf++ = '\0'; 438 return buf - buf0; 439} 440 441static int 442in_set_selectsrc(struct in_ifselsrc *iss, char *buf) 443{ 444 int i, s; 445 char *next = buf; 446 const char *name; 447 in_score_src_t score_src; 448 in_score_src_t scorers[IN_SCORE_SRC_MAX]; 449 450 memset(&scorers, 0, sizeof(scorers)); 451 for (i = 0; 452 (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX; 453 i++) { 454 if (strcmp(name, "") == 0) 455 break; 456 if ((score_src = name_to_score_src(name)) == NULL) 457 return EINVAL; 458 scorers[i] = score_src; 459 } 460 if (i == IN_SCORE_SRC_MAX && name != NULL) 461 return EFBIG; 462 s = splnet(); 463 (void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src)); 464 /* If iss affects a specific interface that used to use 465 * the default policy, increase the sequence number on the 466 * default policy, forcing routes that cache a source 467 * (rt_ifa) found by the default policy to refresh their 468 * cache. 469 */ 470 if (iss != &default_iss && iss->iss_score_src[0] == NULL && 471 scorers[0] != NULL) 472 default_iss.iss_seqno++; 473 iss->iss_seqno++; 474 splx(s); 475 return 0; 476} 477 478/* 479 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc. 480 * Pulls the old value out as a human-readable string, interprets 481 * and records the new value. 482 */ 483static int 484in_sysctl_selectsrc(SYSCTLFN_ARGS) 485{ 486 char policy[IN_SELECTSRC_LEN]; 487 int error; 488 struct sysctlnode node; 489 struct in_ifselsrc *iss; 490 491 node = *rnode; 492 iss = (struct in_ifselsrc *)node.sysctl_data; 493 if (oldp != NULL && 494 (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy)) 495 return error; 496 node.sysctl_data = &policy[0]; 497 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 498 if (error || newp == NULL) 499 return (error); 500 501 return in_set_selectsrc(iss, policy); 502} 503 504static const struct sysctlnode * 505in_domifattach_sysctl(struct in_ifsysctl *isc) 506{ 507 int rc; 508 const struct sysctlnode *rnode; 509 510 if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode, 511 CTLFLAG_READONLY, CTLTYPE_NODE, 512 "interfaces", NULL, 513 NULL, 0, NULL, 0, 514 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, 515 CTL_EOL)) != 0) { 516 printf("%s: could not create net.inet.ip.interfaces, rc = %d\n", 517 __func__, rc); 518 return NULL; 519 } 520 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode, 521 CTLFLAG_READONLY, CTLTYPE_NODE, 522 isc->isc_ifp->if_xname, 523 SYSCTL_DESCR("interface ip options"), 524 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) { 525 printf("%s: could not create net.inet.ip.interfaces.%s, " 526 "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc); 527 goto err; 528 } 529 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode, 530 CTLFLAG_READWRITE, CTLTYPE_STRING, 531 "selectsrc", 532 SYSCTL_DESCR("source selection policy"), 533 in_sysctl_selectsrc, 0, 534 (void *)isc->isc_selsrc, IN_SELECTSRC_LEN, 535 CTL_CREATE, CTL_EOL)) != 0) { 536 printf( 537 "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n", 538 __func__, isc->isc_ifp->if_xname, rc); 539 goto err; 540 } 541 return rnode; 542err: 543 sysctl_teardown(&isc->isc_log); 544 return NULL; 545} 546 547void * 548in_selsrc_domifattach(struct ifnet *ifp) 549{ 550 struct in_ifsysctl *isc; 551 struct in_ifselsrc *iss; 552 553 isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR, 554 M_WAITOK | M_ZERO); 555 556 iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR, 557 M_WAITOK | M_ZERO); 558 559 memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0], 560 MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src))); 561 562 isc->isc_ifp = ifp; 563 isc->isc_selsrc = iss; 564 565 if (in_domifattach_sysctl(isc) == NULL) 566 goto err; 567 568 return isc; 569err: 570 free(iss, M_IFADDR); 571 free(isc, M_IFADDR); 572 return NULL; 573} 574 575void 576in_selsrc_domifdetach(struct ifnet *ifp, void *aux) 577{ 578 struct in_ifsysctl *isc; 579 struct in_ifselsrc *iss; 580 581 if (aux == NULL) 582 return; 583 isc = (struct in_ifsysctl *)aux; 584 iss = isc->isc_selsrc; 585 sysctl_teardown(&isc->isc_log); 586 free(isc, M_IFADDR); 587 free(iss, M_IFADDR); 588} 589#endif /* INET */ 590