1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1986, 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31/************************************************************************
32 * Note: In this file a 'fib' is a "forwarding information base"	*
33 * Which is the new name for an in kernel routing (next hop) table.	*
34 ***********************************************************************/
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD$");
38#include "opt_route.h"
39
40#include <sys/param.h>
41#include <sys/socket.h>
42#include <sys/systm.h>
43#include <sys/malloc.h>
44#include <sys/jail.h>
45#include <sys/proc.h>
46#include <sys/sysctl.h>
47#include <sys/syslog.h>
48#include <sys/kernel.h>
49#include <sys/lock.h>
50#include <sys/sx.h>
51#include <sys/domain.h>
52#include <sys/sysproto.h>
53
54#include <net/vnet.h>
55#include <net/route.h>
56#include <net/route/route_var.h>
57
58/* Kernel config default option. */
59#ifdef ROUTETABLES
60#if ROUTETABLES <= 0
61#error "ROUTETABLES defined too low"
62#endif
63#if ROUTETABLES > RT_MAXFIBS
64#error "ROUTETABLES defined too big"
65#endif
66#define	RT_NUMFIBS	ROUTETABLES
67#endif /* ROUTETABLES */
68/* Initialize to default if not otherwise set. */
69#ifndef	RT_NUMFIBS
70#define	RT_NUMFIBS	1
71#endif
72
73static void grow_rtables(uint32_t num_fibs);
74
75VNET_DEFINE_STATIC(struct sx, rtables_lock);
76#define	V_rtables_lock		VNET(rtables_lock)
77#define	RTABLES_LOCK()		sx_xlock(&V_rtables_lock)
78#define	RTABLES_UNLOCK()	sx_xunlock(&V_rtables_lock)
79#define	RTABLES_LOCK_INIT()	sx_init(&V_rtables_lock, "rtables lock")
80#define	RTABLES_LOCK_ASSERT()	sx_assert(&V_rtables_lock, SA_LOCKED)
81
82VNET_DEFINE_STATIC(struct rib_head **, rt_tables);
83#define	V_rt_tables	VNET(rt_tables)
84
85VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS;
86
87/*
88 * Handler for net.my_fibnum.
89 * Returns current fib of the process.
90 */
91static int
92sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
93{
94        int fibnum;
95        int error;
96
97        fibnum = curthread->td_proc->p_fibnum;
98        error = sysctl_handle_int(oidp, &fibnum, 0, req);
99        return (error);
100}
101SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
102    CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
103    &sysctl_my_fibnum, "I",
104    "default FIB of caller");
105
106static uint32_t
107normalize_num_rtables(uint32_t num_rtables)
108{
109
110	if (num_rtables > RT_MAXFIBS)
111		num_rtables = RT_MAXFIBS;
112	else if (num_rtables == 0)
113		num_rtables = 1;
114	return (num_rtables);
115}
116
117/*
118 * Sets the number of fibs in the current vnet.
119 * Function does not allow shrinking number of rtables.
120 */
121static int
122sysctl_fibs(SYSCTL_HANDLER_ARGS)
123{
124	uint32_t new_fibs;
125	int error;
126
127	RTABLES_LOCK();
128	new_fibs = V_rt_numfibs;
129	error = sysctl_handle_32(oidp, &new_fibs, 0, req);
130	if (error == 0) {
131		new_fibs = normalize_num_rtables(new_fibs);
132
133		if (new_fibs < V_rt_numfibs)
134			error = ENOTCAPABLE;
135		if (new_fibs > V_rt_numfibs)
136			grow_rtables(new_fibs);
137	}
138	RTABLES_UNLOCK();
139
140	return (error);
141}
142SYSCTL_PROC(_net, OID_AUTO, fibs,
143    CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
144    NULL, 0, &sysctl_fibs, "IU",
145    "set number of fibs");
146
147/*
148 * Sets fib of a current process.
149 */
150int
151sys_setfib(struct thread *td, struct setfib_args *uap)
152{
153	int error = 0;
154
155	CURVNET_SET(TD_TO_VNET(td));
156	if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs)
157		td->td_proc->p_fibnum = uap->fibnum;
158	else
159		error = EINVAL;
160	CURVNET_RESTORE();
161
162	return (error);
163}
164
165/*
166 * Grows up the number of routing tables in the current fib.
167 * Function creates new index array for all rtables and allocates
168 *  remaining routing tables.
169 */
170static void
171grow_rtables(uint32_t num_tables)
172{
173	struct domain *dom;
174	struct rib_head **prnh, *rh;
175	struct rib_head **new_rt_tables, **old_rt_tables;
176	int family;
177
178	RTABLES_LOCK_ASSERT();
179
180	KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n",
181				num_tables, V_rt_numfibs));
182
183	new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *),
184	    M_RTABLE, M_WAITOK | M_ZERO);
185
186	if ((num_tables > 1) && (V_rt_add_addr_allfibs == 0))
187		printf("WARNING: Adding ifaddrs to all fibs has been turned off "
188			"by default. Consider tuning %s if needed\n",
189			"net.add_addr_allfibs");
190
191#ifdef FIB_ALGO
192	fib_grow_rtables(num_tables);
193#endif
194
195	/*
196	 * Current rt_tables layout:
197	 * fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX]..
198	 * this allows to copy existing tables data by using memcpy()
199	 */
200	if (V_rt_tables != NULL)
201		memcpy(new_rt_tables, V_rt_tables,
202		    V_rt_numfibs * (AF_MAX + 1) * sizeof(void *));
203
204	/* Populate the remainders */
205	for (dom = domains; dom; dom = dom->dom_next) {
206		if (dom->dom_rtattach == NULL)
207			continue;
208		family = dom->dom_family;
209		for (int i = 0; i < num_tables; i++) {
210			prnh = &new_rt_tables[i * (AF_MAX + 1) + family];
211			if (*prnh != NULL)
212				continue;
213			rh = dom->dom_rtattach(i);
214			if (rh == NULL)
215				log(LOG_ERR, "unable to create routing table for %d.%d\n",
216				    dom->dom_family, i);
217			*prnh = rh;
218		}
219	}
220
221	/*
222	 * Update rtables pointer.
223	 * Ensure all writes to new_rt_tables has been completed before
224	 *  switching pointer.
225	 */
226	atomic_thread_fence_rel();
227	old_rt_tables = V_rt_tables;
228	V_rt_tables = new_rt_tables;
229
230	/* Wait till all cpus see new pointers */
231	atomic_thread_fence_rel();
232	epoch_wait_preempt(net_epoch_preempt);
233
234	/* Set number of fibs to a new value */
235	V_rt_numfibs = num_tables;
236
237#ifdef FIB_ALGO
238	/* Attach fib algo to the new rtables */
239	for (dom = domains; dom; dom = dom->dom_next) {
240		if (dom->dom_rtattach != NULL)
241			fib_setup_family(dom->dom_family, num_tables);
242	}
243#endif
244
245	if (old_rt_tables != NULL)
246		free(old_rt_tables, M_RTABLE);
247}
248
249static void
250vnet_rtables_init(const void *unused __unused)
251{
252	int num_rtables_base;
253
254	if (IS_DEFAULT_VNET(curvnet)) {
255		num_rtables_base = RT_NUMFIBS;
256		TUNABLE_INT_FETCH("net.fibs", &num_rtables_base);
257		V_rt_numfibs = normalize_num_rtables(num_rtables_base);
258	} else
259		V_rt_numfibs = 1;
260
261	vnet_rtzone_init();
262#ifdef FIB_ALGO
263	vnet_fib_init();
264#endif
265	RTABLES_LOCK_INIT();
266
267	RTABLES_LOCK();
268	grow_rtables(V_rt_numfibs);
269	RTABLES_UNLOCK();
270}
271VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
272    vnet_rtables_init, 0);
273
274#ifdef VIMAGE
275static void
276rtables_destroy(const void *unused __unused)
277{
278	struct rib_head *rnh;
279	struct domain *dom;
280	int family;
281
282	RTABLES_LOCK();
283	for (dom = domains; dom; dom = dom->dom_next) {
284		if (dom->dom_rtdetach == NULL)
285			continue;
286		family = dom->dom_family;
287		for (int i = 0; i < V_rt_numfibs; i++) {
288			rnh = rt_tables_get_rnh(i, family);
289			dom->dom_rtdetach(rnh);
290		}
291	}
292	RTABLES_UNLOCK();
293
294	/*
295	 * dom_rtdetach calls rt_table_destroy(), which
296	 *  schedules deletion for all rtentries, nexthops and control
297	 *  structures. Wait for the destruction callbacks to fire.
298	 * Note that this should result in freeing all rtentries, but
299	 *  nexthops deletions will be scheduled for the next epoch run
300	 *  and will be completed after vnet teardown.
301	 */
302	epoch_drain_callbacks(net_epoch_preempt);
303
304	free(V_rt_tables, M_RTABLE);
305	vnet_rtzone_destroy();
306#ifdef FIB_ALGO
307	vnet_fib_destroy();
308#endif
309}
310VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
311    rtables_destroy, 0);
312#endif
313
314static inline struct rib_head *
315rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family)
316{
317	struct rib_head **prnh;
318
319	KASSERT(table < V_rt_numfibs,
320	    ("%s: table out of bounds (%d < %d)", __func__, table,
321	     V_rt_numfibs));
322	KASSERT(family < (AF_MAX + 1),
323	    ("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1));
324
325	/* rnh is [fib=0][af=0]. */
326	prnh = V_rt_tables;
327	/* Get the offset to the requested table and fam. */
328	prnh += table * (AF_MAX + 1) + family;
329
330	return (*prnh);
331}
332
333struct rib_head *
334rt_tables_get_rnh(uint32_t table, sa_family_t family)
335{
336
337	return (rt_tables_get_rnh_ptr(table, family));
338}
339
340u_int
341rt_tables_get_gen(uint32_t table, sa_family_t family)
342{
343	struct rib_head *rnh;
344
345	rnh = rt_tables_get_rnh_ptr(table, family);
346	KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d",
347	    __func__, table, family));
348	return (rnh->rnh_gen);
349}
350