1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2021-2022 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29#include "opt_inet.h"
30#include "opt_inet6.h"
31#include "opt_route.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/malloc.h>
36#include <sys/socket.h>
37#include <sys/jail.h>
38#include <sys/kernel.h>
39#include <sys/lock.h>
40#include <sys/rmlock.h>
41
42#include <net/if.h>
43#include <net/if_var.h>
44#include <net/vnet.h>
45#include <net/route.h>
46#include <net/route/route_ctl.h>
47#include <net/route/route_var.h>
48#include <net/route/nhop.h>
49#include <netinet/in.h>
50#include <netinet6/scope6_var.h>
51
52#include <vm/uma.h>
53
54/* Routing table UMA zone */
55VNET_DEFINE_STATIC(uma_zone_t, rtzone);
56#define	V_rtzone	VNET(rtzone)
57
58void
59vnet_rtzone_init(void)
60{
61
62	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
63		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
64}
65
66#ifdef VIMAGE
67void
68vnet_rtzone_destroy(void)
69{
70
71	uma_zdestroy(V_rtzone);
72}
73#endif
74
75/*
76 * Creates rtentry and based on @dst/@netmask data.
77 * Return 0 and fills in rtentry into @prt on success,
78 * Note: rtentry mask ptr will be set to @netmask , thus its pointer is required
79 *  to be stable till the end of the operation (radix rt insertion/change/removal).
80 */
81struct rtentry *
82rt_alloc(struct rib_head *rnh, const struct sockaddr *dst,
83    struct sockaddr *netmask)
84{
85	MPASS(dst->sa_len <= sizeof(((struct rtentry *)NULL)->rt_dstb));
86
87	struct rtentry *rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
88	if (rt == NULL)
89		return (NULL);
90	rt->rte_flags = RTF_UP | (netmask == NULL ? RTF_HOST : 0);
91
92	/* Fill in dst, ensuring it's masked if needed. */
93	if (netmask != NULL) {
94		rt_maskedcopy(dst, &rt->rt_dst, netmask);
95	} else
96		bcopy(dst, &rt->rt_dst, dst->sa_len);
97	rt_key(rt) = &rt->rt_dst;
98	/* Set netmask to the storage from info. It will be updated upon insertion */
99	rt_mask(rt) = netmask;
100
101	return (rt);
102}
103
104static void
105destroy_rtentry(struct rtentry *rt)
106{
107#ifdef VIMAGE
108	struct nhop_object *nh = rt->rt_nhop;
109
110	/*
111	 * At this moment rnh, nh_control may be already freed.
112	 * nhop interface may have been migrated to a different vnet.
113	 * Use vnet stored in the nexthop to delete the entry.
114	 */
115#ifdef ROUTE_MPATH
116	if (NH_IS_NHGRP(nh)) {
117		const struct weightened_nhop *wn;
118		uint32_t num_nhops;
119		wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
120		nh = wn[0].nh;
121	}
122#endif
123	CURVNET_SET(nhop_get_vnet(nh));
124#endif
125
126	/* Unreference nexthop */
127	nhop_free_any(rt->rt_nhop);
128
129	rt_free_immediate(rt);
130
131	CURVNET_RESTORE();
132}
133
134/*
135 * Epoch callback indicating rtentry is safe to destroy
136 */
137static void
138destroy_rtentry_epoch(epoch_context_t ctx)
139{
140	struct rtentry *rt;
141
142	rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);
143
144	destroy_rtentry(rt);
145}
146
147/*
148 * Schedule rtentry deletion
149 */
150void
151rt_free(struct rtentry *rt)
152{
153
154	KASSERT(rt != NULL, ("%s: NULL rt", __func__));
155
156	NET_EPOCH_CALL(destroy_rtentry_epoch, &rt->rt_epoch_ctx);
157}
158
159void
160rt_free_immediate(struct rtentry *rt)
161{
162	uma_zfree(V_rtzone, rt);
163}
164
165bool
166rt_is_host(const struct rtentry *rt)
167{
168
169	return (rt->rte_flags & RTF_HOST);
170}
171
172sa_family_t
173rt_get_family(const struct rtentry *rt)
174{
175	const struct sockaddr *dst;
176
177	dst = (const struct sockaddr *)rt_key_const(rt);
178
179	return (dst->sa_family);
180}
181
182/*
183 * Returns pointer to nexthop or nexthop group
184 * associated with @rt
185 */
186struct nhop_object *
187rt_get_raw_nhop(const struct rtentry *rt)
188{
189
190	return (rt->rt_nhop);
191}
192
193void
194rt_get_rnd(const struct rtentry *rt, struct route_nhop_data *rnd)
195{
196	rnd->rnd_nhop = rt->rt_nhop;
197	rnd->rnd_weight = rt->rt_weight;
198}
199
200/*
201 * If the process in in jail w/o VNET, export only host routes for the
202 *  addresses assigned to the jail.
203 * Otherwise, allow exporting the entire table.
204 */
205bool
206rt_is_exportable(const struct rtentry *rt, struct ucred *cred)
207{
208	if (!rt_is_host(rt)) {
209		/*
210		 * Performance optimisation: only host routes are allowed
211		 * in the jail w/o vnet.
212		 */
213		if (jailed_without_vnet(cred))
214			return (false);
215	} else {
216		if (prison_if(cred, rt_key_const(rt)) != 0)
217			return (false);
218	}
219
220	return (true);
221}
222
223#ifdef INET
224/*
225 * Stores IPv4 address and prefix length of @rt inside
226 *  @paddr and @plen.
227 * @pscopeid is currently always set to 0.
228 */
229void
230rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr,
231    int *plen, uint32_t *pscopeid)
232{
233	const struct sockaddr_in *dst;
234
235	dst = (const struct sockaddr_in *)rt_key_const(rt);
236	KASSERT((dst->sin_family == AF_INET),
237	    ("rt family is %d, not inet", dst->sin_family));
238	*paddr = dst->sin_addr;
239	dst = (const struct sockaddr_in *)rt_mask_const(rt);
240	if (dst == NULL)
241		*plen = 32;
242	else
243		*plen = bitcount32(dst->sin_addr.s_addr);
244	*pscopeid = 0;
245}
246
247/*
248 * Stores IPv4 address and prefix mask of @rt inside
249 *  @paddr and @pmask. Sets mask to INADDR_ANY for host routes.
250 * @pscopeid is currently always set to 0.
251 */
252void
253rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr,
254    struct in_addr *pmask, uint32_t *pscopeid)
255{
256	const struct sockaddr_in *dst;
257
258	dst = (const struct sockaddr_in *)rt_key_const(rt);
259	KASSERT((dst->sin_family == AF_INET),
260	    ("rt family is %d, not inet", dst->sin_family));
261	*paddr = dst->sin_addr;
262	dst = (const struct sockaddr_in *)rt_mask_const(rt);
263	if (dst == NULL)
264		pmask->s_addr = INADDR_BROADCAST;
265	else
266		*pmask = dst->sin_addr;
267	*pscopeid = 0;
268}
269#endif
270
271#ifdef INET6
272static int
273inet6_get_plen(const struct in6_addr *addr)
274{
275
276	return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) +
277	    bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3]));
278}
279
280/*
281 * Stores IPv6 address and prefix length of @rt inside
282 *  @paddr and @plen. Addresses are returned in de-embedded form.
283 * Scopeid is set to 0 for non-LL addresses.
284 */
285void
286rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr,
287    int *plen, uint32_t *pscopeid)
288{
289	const struct sockaddr_in6 *dst;
290
291	dst = (const struct sockaddr_in6 *)rt_key_const(rt);
292	KASSERT((dst->sin6_family == AF_INET6),
293	    ("rt family is %d, not inet6", dst->sin6_family));
294	if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
295		in6_splitscope(&dst->sin6_addr, paddr, pscopeid);
296	else
297		*paddr = dst->sin6_addr;
298	dst = (const struct sockaddr_in6 *)rt_mask_const(rt);
299	if (dst == NULL)
300		*plen = 128;
301	else
302		*plen = inet6_get_plen(&dst->sin6_addr);
303}
304
305/*
306 * Stores IPv6 address and prefix mask of @rt inside
307 *  @paddr and @pmask. Addresses are returned in de-embedded form.
308 * Scopeid is set to 0 for non-LL addresses.
309 */
310void
311rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr,
312    struct in6_addr *pmask, uint32_t *pscopeid)
313{
314	const struct sockaddr_in6 *dst;
315
316	dst = (const struct sockaddr_in6 *)rt_key_const(rt);
317	KASSERT((dst->sin6_family == AF_INET6),
318	    ("rt family is %d, not inet", dst->sin6_family));
319	if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
320		in6_splitscope(&dst->sin6_addr, paddr, pscopeid);
321	else
322		*paddr = dst->sin6_addr;
323	dst = (const struct sockaddr_in6 *)rt_mask_const(rt);
324	if (dst == NULL)
325		memset(pmask, 0xFF, sizeof(struct in6_addr));
326	else
327		*pmask = dst->sin6_addr;
328}
329#endif
330
331
332