1/*	$NetBSD: in_selsrc.c,v 1.17 2016/07/07 09:32:02 ozaki-r Exp $	*/
2
3/*-
4 * Copyright (c) 2005 David Young.  All rights reserved.
5 *
6 * This code was written by David Young.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
21 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.17 2016/07/07 09:32:02 ozaki-r Exp $");
33
34#ifdef _KERNEL_OPT
35#include "opt_inet.h"
36#include "opt_inet_conf.h"
37#endif
38
39#include <lib/libkern/libkern.h>
40
41#include <sys/param.h>
42#include <sys/ioctl.h>
43#include <sys/errno.h>
44#include <sys/malloc.h>
45#include <sys/socket.h>
46#include <sys/socketvar.h>
47#include <sys/sysctl.h>
48#include <sys/systm.h>
49#include <sys/proc.h>
50#include <sys/syslog.h>
51
52#include <net/if.h>
53
54#include <net/if_ether.h>
55
56#include <netinet/in_systm.h>
57#include <netinet/in.h>
58#include <netinet/in_var.h>
59#include <netinet/ip.h>
60#include <netinet/ip_var.h>
61#include <netinet/in_ifattach.h>
62#include <netinet/in_pcb.h>
63#include <netinet/if_inarp.h>
64#include <netinet/ip_mroute.h>
65#include <netinet/igmp_var.h>
66#include <netinet/in_selsrc.h>
67
68#ifdef INET
69struct score_src_name {
70	const char		*sn_name;
71	const in_score_src_t	sn_score_src;
72};
73
74static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *);
75static int in_preference(const struct in_addr *, int, int,
76    const struct in_addr *);
77static int in_index(const struct in_addr *, int, int, const struct in_addr *);
78static int in_matchlen(const struct in_addr *, int, int,
79    const struct in_addr *);
80static int in_match_category(const struct in_addr *, int, int,
81    const struct in_addr *);
82static size_t in_get_selectsrc(const struct in_ifselsrc *, char *,
83    const size_t);
84static int in_set_selectsrc(struct in_ifselsrc *, char *buf);
85static int in_sysctl_selectsrc(SYSCTLFN_PROTO);
86static in_score_src_t name_to_score_src(const char *);
87static const char *score_src_to_name(const in_score_src_t);
88static void in_score(const in_score_src_t *, int *, int *,
89    const struct in_addr *, int, int, const struct in_addr *);
90
91static const struct score_src_name score_src_names[] = {
92	  {"same-category", in_match_category}
93	, {"common-prefix-len", in_matchlen}
94	, {"index", in_index}
95	, {"preference", in_preference}
96	, {NULL, NULL}
97};
98
99static const struct in_ifselsrc initial_iss = { 0, {NULL} };
100
101static struct in_ifselsrc default_iss = { 0, {in_index} };
102
103#ifdef GETIFA_DEBUG
104int in_selsrc_debug = 0;
105#endif /* GETIFA_DEBUG */
106
107SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup")
108{
109	int rc;
110	const struct sysctlnode *rnode, *cnode;
111
112	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
113	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet",
114	    NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) {
115		printf("%s: could not create net.inet, rc = %d\n", __func__,
116		    rc);
117		return;
118	}
119	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
120	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip",
121	    NULL, NULL, 0, NULL, 0,
122	    CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) {
123		printf("%s: could not create net.inet.ip, rc = %d\n", __func__,
124		    rc);
125		return;
126	}
127	if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
128	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc",
129	    NULL, NULL, 0, NULL, 0,
130	    CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) {
131		printf("%s: could not create net.inet.ip.selectsrc, "
132		       "rc = %d\n", __func__, rc);
133		return;
134	}
135#ifdef GETIFA_DEBUG
136	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
137	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug",
138	    SYSCTL_DESCR("enable source-selection debug messages"),
139	    NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) {
140		printf("%s: could not create net.inet.ip.selectsrc.debug, "
141		       "rc = %d\n", __func__, rc);
142		return;
143	}
144#endif /* GETIFA_DEBUG */
145	if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
146	    CTLFLAG_READWRITE, CTLTYPE_STRUCT, "default",
147	    SYSCTL_DESCR("default source selection policy"),
148	    in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN,
149	    CTL_CREATE, CTL_EOL)) != 0) {
150		printf(
151		    "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
152		    __func__, rc);
153		return;
154	}
155}
156
157/*
158 * Score by address preference: prefer addresses with higher preference
159 * number.  Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
160 */
161static int
162in_preference(const struct in_addr *src, int preference,
163    int idx, const struct in_addr *dst)
164{
165	return preference;
166}
167
168/*
169 * Score by address "index": prefer addresses nearer the head of
170 * the ifaddr list.
171 */
172static int
173in_index(const struct in_addr *src, int preference, int idx,
174    const struct in_addr *dst)
175{
176	return -idx;
177}
178
179/*
180 * Length of longest common prefix of src and dst.
181 *
182 * (Derived from in6_matchlen.)
183 */
184static int
185in_matchlen(const struct in_addr *src, int preference,
186    int idx, const struct in_addr *dst)
187{
188	int match = 0;
189	const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst;
190	const uint8_t *lim = s + 4;
191	uint_fast8_t r = 0;
192
193	while (s < lim && (r = (*d++ ^ *s++)) == 0)
194		match += 8;
195
196	if (s == lim)
197		return match;
198
199	while ((r & 0x80) == 0) {
200		match++;
201		r <<= 1;
202	}
203	return match;
204}
205
206static enum in_category
207in_categorize(const struct in_addr *s)
208{
209	if (IN_ANY_LOCAL(s->s_addr))
210		return IN_CATEGORY_LINKLOCAL;
211	else if (IN_PRIVATE(s->s_addr))
212		return IN_CATEGORY_PRIVATE;
213	else
214		return IN_CATEGORY_OTHER;
215}
216
217static int
218in_match_category(const struct in_addr *src, int preference,
219    int idx, const struct in_addr *dst)
220{
221	enum in_category dst_c = in_categorize(dst),
222	                 src_c = in_categorize(src);
223#ifdef GETIFA_DEBUG
224	if (in_selsrc_debug) {
225		printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32
226		    " categ %d\n", __func__, ntohl(dst->s_addr), dst_c,
227		    ntohl(src->s_addr), src_c);
228	}
229#endif /* GETIFA_DEBUG */
230
231	if (dst_c == src_c)
232		return 2;
233	else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE)
234		return 1;
235	else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL)
236		return 1;
237	else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE)
238		return 1;
239	else
240		return 0;
241}
242
243static void
244in_score(const in_score_src_t *score_src, int *score, int *scorelenp,
245    const struct in_addr *src, int preference, int idx,
246    const struct in_addr *dst)
247{
248	int i;
249
250	for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++)
251		score[i] = (*score_src[i])(src, preference, idx, dst);
252	if (scorelenp != NULL)
253		*scorelenp = i;
254}
255
256static int
257in_score_cmp(int *score1, int *score2, int scorelen)
258{
259	int i;
260
261	for (i = 0; i < scorelen; i++) {
262		if (score1[i] == score2[i])
263			continue;
264		return score1[i] - score2[i];
265	}
266	return 0;
267}
268
269#ifdef GETIFA_DEBUG
270static void
271in_score_println(int *score, int scorelen)
272{
273	int i;
274	const char *delim = "[";
275
276	for (i = 0; i < scorelen; i++) {
277		printf("%s%d", delim, score[i]);
278		delim = ", ";
279	}
280	printf("]\n");
281}
282#endif /* GETIFA_DEBUG */
283
284/* Scan the interface addresses on the interface ifa->ifa_ifp for
285 * the source address that best matches the destination, dst0,
286 * according to the source address-selection policy for this
287 * interface.  If there is no better match than `ifa', return `ifa'.
288 * Otherwise, return the best address.
289 *
290 * Note that in_getifa is called after the kernel has decided which
291 * output interface to use (ifa->ifa_ifp), and in_getifa will not
292 * scan an address belonging to any other interface.
293 */
294struct ifaddr *
295in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0)
296{
297	const in_score_src_t *score_src;
298	int idx, scorelen;
299	const struct sockaddr_in *dst, *src;
300	struct ifaddr *alt_ifa, *best_ifa;
301	struct ifnet *ifp;
302	struct in_ifsysctl *isc;
303	struct in_ifselsrc *iss;
304	int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX];
305	struct in_ifaddr *ia;
306
307	if (ifa->ifa_addr->sa_family != AF_INET ||
308	    dst0 == NULL || dst0->sa_family != AF_INET) {	/* Possible. */
309		ifa->ifa_seqno = NULL;
310		return ifa;
311	}
312
313	ifp = ifa->ifa_ifp;
314	KASSERT(ifp->if_afdata[AF_INET] != NULL);
315	isc = ((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_selsrc;
316	if (isc != NULL && isc->isc_selsrc != NULL &&
317	    isc->isc_selsrc->iss_score_src[0] != NULL)
318		iss = isc->isc_selsrc;
319	else
320		iss = &default_iss;
321	score_src = &iss->iss_score_src[0];
322
323	dst = (const struct sockaddr_in *)dst0;
324
325	best_ifa = ifa;
326
327	/* Find out the index of this ifaddr. */
328	idx = 0;
329	IFADDR_READER_FOREACH(alt_ifa, ifa->ifa_ifp) {
330		if (alt_ifa == best_ifa)
331			break;
332		idx++;
333	}
334	in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr,
335	    best_ifa->ifa_preference, idx, &dst->sin_addr);
336
337#ifdef GETIFA_DEBUG
338	if (in_selsrc_debug) {
339		printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ",
340		    __func__, ntohl(dst->sin_addr.s_addr),
341		    ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr));
342		in_score_println(best_score, scorelen);
343	}
344#endif /* GETIFA_DEBUG */
345
346	idx = -1;
347	IFADDR_READER_FOREACH(alt_ifa, ifa->ifa_ifp) {
348		++idx;
349		src = IA_SIN(alt_ifa);
350
351		if (alt_ifa == ifa || src->sin_family != AF_INET)
352			continue;
353		ia = (struct in_ifaddr *)alt_ifa;
354		if (ia->ia4_flags & IN_IFF_NOTREADY)
355			continue;
356
357		in_score(score_src, score, NULL, &src->sin_addr,
358		         alt_ifa->ifa_preference, idx, &dst->sin_addr);
359
360#ifdef GETIFA_DEBUG
361		if (in_selsrc_debug) {
362			printf("%s: src %#" PRIx32 " score ", __func__,
363			    ntohl(src->sin_addr.s_addr));
364			in_score_println(score, scorelen);
365		}
366#endif /* GETIFA_DEBUG */
367
368		if (in_score_cmp(score, best_score, scorelen) > 0) {
369			(void)memcpy(best_score, score, sizeof(best_score));
370			best_ifa = alt_ifa;
371		}
372	}
373
374	ia = (struct in_ifaddr *)best_ifa;
375	if (ia->ia4_flags & IN_IFF_NOTREADY)
376		return NULL;
377
378#ifdef GETIFA_DEBUG
379	if (in_selsrc_debug) {
380		printf("%s: choose src %#" PRIx32 " score ", __func__,
381		    ntohl(IA_SIN(best_ifa)->sin_addr.s_addr));
382		in_score_println(best_score, scorelen);
383	}
384#endif /* GETIFA_DEBUG */
385
386	best_ifa->ifa_seqno = &iss->iss_seqno;
387	return best_ifa;
388}
389
390static in_score_src_t
391name_to_score_src(const char *name)
392{
393	int i;
394
395	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
396		if (strcmp(score_src_names[i].sn_name, name) == 0)
397			return score_src_names[i].sn_score_src;
398	}
399	return NULL;
400}
401
402static const char *
403score_src_to_name(const in_score_src_t score_src)
404{
405	int i;
406	for (i = 0; score_src_names[i].sn_name != NULL; i++) {
407		if (score_src == score_src_names[i].sn_score_src)
408			return score_src_names[i].sn_name;
409	}
410	return "<unknown>";
411}
412
413static size_t
414in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0,
415    const size_t buflen0)
416{
417	int i, rc;
418	char *buf = buf0;
419	const char *delim;
420	size_t buflen = buflen0;
421
422	KASSERT(buflen >= 1);
423
424	for (delim = "", i = 0;
425	     i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL;
426	     delim = ",", i++) {
427		rc = snprintf(buf, buflen, "%s%s",
428		    delim, score_src_to_name(iss->iss_score_src[i]));
429		if (rc == -1)
430			return buflen0 - buflen;
431		if (rc >= buflen)
432			return buflen0 + rc - buflen;
433		buf += rc;
434		buflen -= rc;
435	}
436	if (buf == buf0)
437		*buf++ = '\0';
438	return buf - buf0;
439}
440
441static int
442in_set_selectsrc(struct in_ifselsrc *iss, char *buf)
443{
444	int i, s;
445	char *next = buf;
446	const char *name;
447	in_score_src_t score_src;
448	in_score_src_t scorers[IN_SCORE_SRC_MAX];
449
450	memset(&scorers, 0, sizeof(scorers));
451	for (i = 0;
452	     (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX;
453	     i++) {
454		if (strcmp(name, "") == 0)
455			break;
456		if ((score_src = name_to_score_src(name)) == NULL)
457			return EINVAL;
458		scorers[i] = score_src;
459	}
460	if (i == IN_SCORE_SRC_MAX && name != NULL)
461		return EFBIG;
462	s = splnet();
463	(void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src));
464        /* If iss affects a specific interface that used to use
465         * the default policy, increase the sequence number on the
466         * default policy, forcing routes that cache a source
467         * (rt_ifa) found by the default policy to refresh their
468         * cache.
469	 */
470	if (iss != &default_iss && iss->iss_score_src[0] == NULL &&
471	    scorers[0] != NULL)
472		default_iss.iss_seqno++;
473	iss->iss_seqno++;
474	splx(s);
475	return 0;
476}
477
478/*
479 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
480 * Pulls the old value out as a human-readable string, interprets
481 * and records the new value.
482 */
483static int
484in_sysctl_selectsrc(SYSCTLFN_ARGS)
485{
486	char policy[IN_SELECTSRC_LEN];
487	int error;
488	struct sysctlnode node;
489	struct in_ifselsrc *iss;
490
491	node = *rnode;
492	iss = (struct in_ifselsrc *)node.sysctl_data;
493	if (oldp != NULL &&
494	    (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy))
495		return error;
496	node.sysctl_data = &policy[0];
497	error = sysctl_lookup(SYSCTLFN_CALL(&node));
498	if (error || newp == NULL)
499		return (error);
500
501	return in_set_selectsrc(iss, policy);
502}
503
504static const struct sysctlnode *
505in_domifattach_sysctl(struct in_ifsysctl *isc)
506{
507	int rc;
508	const struct sysctlnode *rnode;
509
510	if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode,
511	                         CTLFLAG_READONLY, CTLTYPE_NODE,
512				 "interfaces", NULL,
513				 NULL, 0, NULL, 0,
514				 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE,
515				 CTL_EOL)) != 0) {
516		printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
517		    __func__, rc);
518		return NULL;
519	}
520	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
521	                         CTLFLAG_READONLY, CTLTYPE_NODE,
522				 isc->isc_ifp->if_xname,
523				 SYSCTL_DESCR("interface ip options"),
524				 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) {
525		printf("%s: could not create net.inet.ip.interfaces.%s, "
526		       "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc);
527		goto err;
528	}
529	if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
530	                         CTLFLAG_READWRITE, CTLTYPE_STRING,
531				 "selectsrc",
532				 SYSCTL_DESCR("source selection policy"),
533				 in_sysctl_selectsrc, 0,
534				 (void *)isc->isc_selsrc, IN_SELECTSRC_LEN,
535				 CTL_CREATE, CTL_EOL)) != 0) {
536		printf(
537		    "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
538		    __func__, isc->isc_ifp->if_xname, rc);
539		goto err;
540	}
541	return rnode;
542err:
543	sysctl_teardown(&isc->isc_log);
544	return NULL;
545}
546
547void *
548in_selsrc_domifattach(struct ifnet *ifp)
549{
550	struct in_ifsysctl *isc;
551	struct in_ifselsrc *iss;
552
553	isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR,
554	    M_WAITOK | M_ZERO);
555
556	iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR,
557	    M_WAITOK | M_ZERO);
558
559	memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0],
560	    MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src)));
561
562	isc->isc_ifp = ifp;
563	isc->isc_selsrc = iss;
564
565	if (in_domifattach_sysctl(isc) == NULL)
566		goto err;
567
568	return isc;
569err:
570	free(iss, M_IFADDR);
571	free(isc, M_IFADDR);
572	return NULL;
573}
574
575void
576in_selsrc_domifdetach(struct ifnet *ifp, void *aux)
577{
578	struct in_ifsysctl *isc;
579	struct in_ifselsrc *iss;
580
581	if (aux == NULL)
582		return;
583	isc = (struct in_ifsysctl *)aux;
584	iss = isc->isc_selsrc;
585	sysctl_teardown(&isc->isc_log);
586	free(isc, M_IFADDR);
587	free(iss, M_IFADDR);
588}
589#endif /* INET */
590