1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2020 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD$
28 */
29#include "opt_inet.h"
30#include "opt_route.h"
31
32#include <sys/cdefs.h>
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/lock.h>
36#include <sys/rmlock.h>
37#include <sys/malloc.h>
38#include <sys/mbuf.h>
39#include <sys/refcount.h>
40#include <sys/socket.h>
41#include <sys/sysctl.h>
42#include <sys/kernel.h>
43#include <sys/epoch.h>
44
45#include <net/if.h>
46#include <net/if_var.h>
47#include <net/route.h>
48#include <net/route/route_ctl.h>
49#include <net/route/route_var.h>
50#include <net/vnet.h>
51
52#include <netinet/in.h>
53#include <netinet/in_var.h>
54#include <netinet/in_fib.h>
55
56#include <net/route/nhop_utils.h>
57#include <net/route/nhop.h>
58#include <net/route/nhop_var.h>
59#include <net/route/nhgrp_var.h>
60
61/*
62 * This file contains the supporting functions for creating multipath groups
63 *  and compiling their dataplane parts.
64 */
65
66/* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */
67_Static_assert(MPF_MULTIPATH == NHF_MULTIPATH,
68    "MPF_MULTIPATH must be the same as NHF_MULTIPATH");
69/* Offset and size of flags field has to be the same for nhop/nhop groups */
70CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags);
71/* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */
72CTASSERT(RIB_MAX_MPATH_WIDTH <= 64);
73
74static int wn_cmp(const void *a, const void *b);
75static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops);
76
77static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl,
78    struct weightened_nhop *wn, int num_nhops, int *perror);
79static void destroy_nhgrp(struct nhgrp_priv *nhg_priv);
80static void destroy_nhgrp_epoch(epoch_context_t ctx);
81static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv);
82
83static int
84wn_cmp(const void *a, const void *b)
85{
86	const struct weightened_nhop *wa = a;
87	const struct weightened_nhop *wb = b;
88
89	if (wa->weight > wb->weight)
90		return (1);
91	else if (wa->weight < wb->weight)
92		return (-1);
93
94	/* Compare nexthops by pointer */
95	if (wa->nh > wb->nh)
96		return (1);
97	else if (wa->nh < wb->nh)
98		return (-1);
99	else
100		return (0);
101}
102
103/*
104 * Perform in-place sorting for array of nexthops in @wn.
105 *
106 * To avoid nh groups duplication, nexthops/weights in the
107 *   @wn need to be ordered deterministically.
108 * As this sorting is needed only for the control plane functionality,
109 *  there are no specific external requirements.
110 *
111 * Sort by weight first, to ease calculation of the slot sizes.
112 */
113static void
114sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops)
115{
116
117	qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp);
118}
119
120/*
121 * Calculate minimum number of slots required to fit the existing
122 * set of weights in the common use case where weights are "easily"
123 * comparable.
124 * Assumes @wn is sorted by weight ascending and each weight is > 0.
125 * Returns number of slots or 0 if precise calculation failed.
126 *
127 * Some examples:
128 * note: (i, X) pair means (nhop=i, weight=X):
129 * (1, 1) (2, 2) -> 3 slots [1, 2, 2]
130 * (1, 100), (2, 200) -> 3 slots [1, 2, 2]
131 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3]
132 */
133static uint32_t
134calc_min_mpath_slots_fast(const struct weightened_nhop *wn, size_t num_items)
135{
136	uint32_t i, last, xmin;
137	uint64_t total = 0;
138
139	last = 0;
140	xmin = wn[0].weight;
141	for (i = 0; i < num_items; i++) {
142		total += wn[i].weight;
143		if ((wn[i].weight - last < xmin) && (wn[i].weight != last))
144			xmin = wn[i].weight - last;
145		last = wn[i].weight;
146	}
147	/* xmin is the minimum unit of desired capacity */
148	if ((total % xmin) != 0)
149		return (0);
150	for (i = 0; i < num_items; i++) {
151		if ((wn[i].weight % xmin) != 0)
152			return (0);
153	}
154
155	return ((uint32_t)(total / xmin));
156}
157
158/*
159 * Calculate minimum number of slots required to fit the existing
160 * set of weights while maintaining weight coefficients.
161 *
162 * Assume @wn is sorted by weight ascending and each weight is > 0.
163 *
164 * Tries to find simple precise solution first and falls back to
165 *  RIB_MAX_MPATH_WIDTH in case of any failure.
166 */
167static uint32_t
168calc_min_mpath_slots(const struct weightened_nhop *wn, size_t num_items)
169{
170	uint32_t v;
171
172	v = calc_min_mpath_slots_fast(wn, num_items);
173	if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH))
174		v = RIB_MAX_MPATH_WIDTH;
175
176	return (v);
177}
178
179/*
180 * Nexthop group data consists of
181 * 1) dataplane part, with nhgrp_object as a header followed by an
182 *   arbitrary number of nexthop pointers.
183 * 2) control plane part, with nhgrp_priv as a header, followed by
184 *   an arbirtrary number of 'struct weightened_nhop' object.
185 *
186 * Given nexthop groups are (mostly) immutable, allocate all data
187 * in one go.
188 *
189 */
190__noinline static size_t
191get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops)
192{
193	size_t sz;
194
195	sz = sizeof(struct nhgrp_object);
196	sz += nhg_size * sizeof(struct nhop_object *);
197	sz += sizeof(struct nhgrp_priv);
198	sz += num_nhops * sizeof(struct weightened_nhop);
199	return (sz);
200}
201
202/*
203 * Compile actual list of nexthops to be used by datapath from
204 *  the nexthop group @dst.
205 *
206 * For example, compiling control plane list of 2 nexthops
207 *  [(200, A), (100, B)] would result in the datapath array
208 *  [A, A, B]
209 */
210static void
211compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
212    uint32_t num_slots)
213{
214	struct nhgrp_object *dst;
215	int i, slot_idx, remaining_slots;
216	uint64_t remaining_sum, nh_weight, nh_slots;
217
218	slot_idx  = 0;
219	dst = dst_priv->nhg;
220	/* Calculate sum of all weights */
221	remaining_sum = 0;
222	for (i = 0; i < dst_priv->nhg_nh_count; i++)
223		remaining_sum += x[i].weight;
224	remaining_slots = num_slots;
225	DPRINTF("O: %u/%u", (uint32_t)remaining_sum, remaining_slots);
226	for (i = 0; i < dst_priv->nhg_nh_count; i++) {
227		/* Calculate number of slots for the current nexthop */
228		if (remaining_sum > 0) {
229			nh_weight = (uint64_t)x[i].weight;
230			nh_slots = (nh_weight * remaining_slots / remaining_sum);
231		} else
232			nh_slots = 0;
233
234		remaining_sum -= x[i].weight;
235		remaining_slots -= nh_slots;
236
237		DPRINTF(" OO[%d]: %u/%u curr=%d slot_idx=%d", i,
238		    (uint32_t)remaining_sum, remaining_slots,
239		    (int)nh_slots, slot_idx);
240
241		KASSERT((slot_idx + nh_slots <= num_slots),
242		    ("index overflow during nhg compilation"));
243		while (nh_slots-- > 0)
244			dst->nhops[slot_idx++] = x[i].nh;
245	}
246}
247
248/*
249 * Allocates new nexthop group for the list of weightened nexthops.
250 * Assume sorted list.
251 * Does NOT reference any nexthops in the group.
252 * Returns group with refcount=1 or NULL.
253 */
254static struct nhgrp_priv *
255alloc_nhgrp(struct weightened_nhop *wn, int num_nhops)
256{
257	uint32_t nhgrp_size;
258	int flags = M_NOWAIT;
259	struct nhgrp_object *nhg;
260	struct nhgrp_priv *nhg_priv;
261
262	nhgrp_size = calc_min_mpath_slots(wn, num_nhops);
263	if (nhgrp_size == 0) {
264		/* Zero weights, abort */
265		return (NULL);
266	}
267
268	size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops);
269	nhg = malloc(sz, M_NHOP, flags | M_ZERO);
270	if (nhg == NULL) {
271		return (NULL);
272	}
273
274	/* Has to be the first to make NHGRP_PRIV() work */
275	nhg->nhg_size = nhgrp_size;
276	DPRINTF("new mpath group: num_nhops: %u", (uint32_t)nhgrp_size);
277	nhg->nhg_flags = MPF_MULTIPATH;
278
279	nhg_priv = NHGRP_PRIV(nhg);
280	nhg_priv->nhg_nh_count = num_nhops;
281	refcount_init(&nhg_priv->nhg_refcount, 1);
282
283	/* Please see nhgrp_free() comments on the initial value */
284	refcount_init(&nhg_priv->nhg_linked, 2);
285
286	nhg_priv->nhg = nhg;
287	memcpy(&nhg_priv->nhg_nh_weights[0], wn,
288	  num_nhops * sizeof(struct weightened_nhop));
289
290	compile_nhgrp(nhg_priv, wn, nhg->nhg_size);
291
292	return (nhg_priv);
293}
294
295void
296nhgrp_ref_object(struct nhgrp_object *nhg)
297{
298	struct nhgrp_priv *nhg_priv;
299	u_int old;
300
301	nhg_priv = NHGRP_PRIV(nhg);
302	old = refcount_acquire(&nhg_priv->nhg_refcount);
303	KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg));
304}
305
306void
307nhgrp_free(struct nhgrp_object *nhg)
308{
309	struct nhgrp_priv *nhg_priv;
310	struct nh_control *ctl;
311	struct epoch_tracker et;
312
313	nhg_priv = NHGRP_PRIV(nhg);
314
315	if (!refcount_release(&nhg_priv->nhg_refcount))
316		return;
317
318	/*
319	 * group objects don't have an explicit lock attached to it.
320	 * As groups are reclaimed based on reference count, it is possible
321	 * that some groups will persist after vnet destruction callback
322	 * called. Given that, handle scenario with nhgrp_free_group() being
323	 * called either after or simultaneously with nhgrp_ctl_unlink_all()
324	 * by using another reference counter: nhg_linked.
325	 *
326	 * There are only 2 places, where nhg_linked can be decreased:
327	 *  rib destroy (nhgrp_ctl_unlink_all) and this function.
328	 * nhg_link can never be increased.
329	 *
330	 * Hence, use initial value of 2 to make use of
331	 *  refcount_release_if_not_last().
332	 *
333	 * There can be two scenarious when calling this function:
334	 *
335	 * 1) nhg_linked value is 2. This means that either
336	 *  nhgrp_ctl_unlink_all() has not been called OR it is running,
337	 *  but we are guaranteed that nh_control won't be freed in
338	 *  this epoch. Hence, nexthop can be safely unlinked.
339	 *
340	 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all()
341	 *  has been called and nhgrp unlink can be skipped.
342	 */
343
344	NET_EPOCH_ENTER(et);
345	if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) {
346		ctl = nhg_priv->nh_control;
347		if (unlink_nhgrp(ctl, nhg_priv) == NULL) {
348			/* Do not try to reclaim */
349			DPRINTF("Failed to unlink nexhop group %p", nhg_priv);
350			NET_EPOCH_EXIT(et);
351			return;
352		}
353	}
354	NET_EPOCH_EXIT(et);
355
356	epoch_call(net_epoch_preempt, destroy_nhgrp_epoch,
357	    &nhg_priv->nhg_epoch_ctx);
358}
359
360/*
361 * Destroys all local resources belonging to @nhg_priv.
362 */
363__noinline static void
364destroy_nhgrp_int(struct nhgrp_priv *nhg_priv)
365{
366
367	free(nhg_priv->nhg, M_NHOP);
368}
369
370__noinline static void
371destroy_nhgrp(struct nhgrp_priv *nhg_priv)
372{
373
374	KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0"));
375
376	DPRINTF("DEL MPATH %p", nhg_priv);
377
378	KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
379
380	free_nhgrp_nhops(nhg_priv);
381
382	destroy_nhgrp_int(nhg_priv);
383}
384
385/*
386 * Epoch callback indicating group is safe to destroy
387 */
388static void
389destroy_nhgrp_epoch(epoch_context_t ctx)
390{
391	struct nhgrp_priv *nhg_priv;
392
393	nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx);
394
395	destroy_nhgrp(nhg_priv);
396}
397
398static bool
399ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
400{
401
402	for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
403		if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0)
404			continue;
405
406		/*
407		 * Failed to ref the nexthop, b/c it's deleted.
408		 * Need to rollback references back.
409		 */
410		for (int j = 0; j < i; j++)
411			nhop_free(nhg_priv->nhg_nh_weights[j].nh);
412		return (false);
413	}
414
415	return (true);
416}
417
418static void
419free_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
420{
421
422	for (int i = 0; i < nhg_priv->nhg_nh_count; i++)
423		nhop_free(nhg_priv->nhg_nh_weights[i].nh);
424}
425
426/*
427 * Creates or looks up an existing nexthop group based on @wn and @num_nhops.
428 *
429 * Returns referenced nhop group or NULL, passing error code in @perror.
430 */
431struct nhgrp_priv *
432get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops,
433    int *perror)
434{
435	struct nhgrp_priv *key, *nhg_priv;
436
437	if (num_nhops > RIB_MAX_MPATH_WIDTH) {
438		*perror = E2BIG;
439		return (NULL);
440	}
441
442	if (ctl->gr_head.hash_size == 0) {
443		/* First multipath request. Bootstrap mpath datastructures. */
444		if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) {
445			*perror = ENOMEM;
446			return (NULL);
447		}
448	}
449
450	/* Sort nexthops & check there are no duplicates */
451	sort_weightened_nhops(wn, num_nhops);
452	uint32_t last_id = 0;
453	for (int i = 0; i < num_nhops; i++) {
454		if (wn[i].nh->nh_priv->nh_idx == last_id) {
455			*perror = EEXIST;
456			return (NULL);
457		}
458		last_id = wn[i].nh->nh_priv->nh_idx;
459	}
460
461	if ((key = alloc_nhgrp(wn, num_nhops)) == NULL) {
462		*perror = ENOMEM;
463		return (NULL);
464	}
465
466	nhg_priv = find_nhgrp(ctl, key);
467	if (nhg_priv != NULL) {
468		/*
469		 * Free originally-created group. As it hasn't been linked
470		 *  and the dependent nexhops haven't been referenced, just free
471		 *  the group.
472		 */
473		destroy_nhgrp_int(key);
474		*perror = 0;
475		return (nhg_priv);
476	} else {
477		/* No existing group, try to link the new one */
478		if (!ref_nhgrp_nhops(key)) {
479			/*
480			 * Some of the nexthops have been scheduled for deletion.
481			 * As the group hasn't been linked / no nexhops have been
482			 *  referenced, call the final destructor immediately.
483			 */
484			destroy_nhgrp_int(key);
485			*perror = EAGAIN;
486			return (NULL);
487		}
488		if (link_nhgrp(ctl, key) == 0) {
489			/* Unable to allocate index? */
490			*perror = EAGAIN;
491			free_nhgrp_nhops(key);
492			destroy_nhgrp_int(key);
493			return (NULL);
494		}
495		*perror = 0;
496		return (key);
497	}
498
499	/* NOTREACHED */
500}
501
502/*
503 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig.
504 *
505 * Returns referenced nexthop group or NULL. In the latter case, @perror is
506 *  filled with an error code.
507 * Note that function does NOT care if the next nexthops already exists
508 * in the @gr_orig. As a result, they will be added, resulting in the
509 * same nexthop being present multiple times in the new group.
510 */
511static struct nhgrp_priv *
512append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig,
513    struct weightened_nhop *wn, int num_nhops, int *perror)
514{
515	char storage[64];
516	struct weightened_nhop *pnhops;
517	struct nhgrp_priv *nhg_priv;
518	const struct nhgrp_priv *src_priv;
519	size_t sz;
520	int curr_nhops;
521
522	src_priv = NHGRP_PRIV_CONST(gr_orig);
523	curr_nhops = src_priv->nhg_nh_count;
524
525	*perror = 0;
526
527	sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop));
528	/* optimize for <= 4 paths, each path=16 bytes */
529	if (sz <= sizeof(storage))
530		pnhops = (struct weightened_nhop *)&storage[0];
531	else {
532		pnhops = malloc(sz, M_TEMP, M_NOWAIT);
533		if (pnhops == NULL) {
534			*perror = ENOMEM;
535			return (NULL);
536		}
537	}
538
539	/* Copy nhops from original group first */
540	memcpy(pnhops, src_priv->nhg_nh_weights,
541	  curr_nhops * sizeof(struct weightened_nhop));
542	memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop));
543	curr_nhops += num_nhops;
544
545	nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, perror);
546
547	if (pnhops != (struct weightened_nhop *)&storage[0])
548		free(pnhops, M_TEMP);
549
550	if (nhg_priv == NULL)
551		return (NULL);
552
553	return (nhg_priv);
554}
555
556
557/*
558 * Creates/finds nexthop group based on @wn and @num_nhops.
559 * Returns 0 on success with referenced group in @rnd, or
560 * errno.
561 *
562 * If the error is EAGAIN, then the operation can be retried.
563 */
564int
565nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops,
566    struct route_nhop_data *rnd)
567{
568	struct nh_control *ctl = rh->nh_control;
569	struct nhgrp_priv *nhg_priv;
570	int error;
571
572	nhg_priv = get_nhgrp(ctl, wn, num_nhops, &error);
573	if (nhg_priv != NULL)
574		rnd->rnd_nhgrp = nhg_priv->nhg;
575	rnd->rnd_weight = 0;
576
577	return (error);
578}
579
580/*
581 * Creates new nexthop group based on @src group without the nexthops
582 * chosen by @flt_func.
583 * Returns 0 on success, storring the reference nhop group/object in @rnd.
584 */
585int
586nhgrp_get_filtered_group(struct rib_head *rh, const struct nhgrp_object *src,
587    nhgrp_filter_cb_t flt_func, void *flt_data, struct route_nhop_data *rnd)
588{
589	char storage[64];
590	struct nh_control *ctl = rh->nh_control;
591	struct weightened_nhop *pnhops;
592	const struct nhgrp_priv *mp_priv, *src_priv;
593	size_t sz;
594	int error, i, num_nhops;
595
596	src_priv = NHGRP_PRIV_CONST(src);
597
598	sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop));
599	/* optimize for <= 4 paths, each path=16 bytes */
600	if (sz <= sizeof(storage))
601		pnhops = (struct weightened_nhop *)&storage[0];
602	else {
603		if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL)
604			return (ENOMEM);
605	}
606
607	/* Filter nexthops */
608	error = 0;
609	num_nhops = 0;
610	for (i = 0; i < src_priv->nhg_nh_count; i++) {
611		if (flt_func(src_priv->nhg_nh_weights[i].nh, flt_data))
612			continue;
613		memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i],
614		  sizeof(struct weightened_nhop));
615	}
616
617	if (num_nhops == 0) {
618		rnd->rnd_nhgrp = NULL;
619		rnd->rnd_weight = 0;
620	} else if (num_nhops == 1) {
621		rnd->rnd_nhop = pnhops[0].nh;
622		rnd->rnd_weight = pnhops[0].weight;
623		if (nhop_try_ref_object(rnd->rnd_nhop) == 0)
624			error = EAGAIN;
625	} else {
626		mp_priv = get_nhgrp(ctl, pnhops, num_nhops, &error);
627		if (mp_priv != NULL)
628			rnd->rnd_nhgrp = mp_priv->nhg;
629		rnd->rnd_weight = 0;
630	}
631
632	if (pnhops != (struct weightened_nhop *)&storage[0])
633		free(pnhops, M_TEMP);
634
635	return (error);
636}
637
638/*
639 * Creates new multipath group based on existing group/nhop in @rnd_orig and
640 *  to-be-added nhop @wn_add.
641 * Returns 0 on success and stores result in @rnd_new.
642 */
643int
644nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig,
645    struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new)
646{
647	struct nh_control *ctl = rh->nh_control;
648	struct nhgrp_priv *nhg_priv;
649	struct weightened_nhop wn[2] = {};
650	int error;
651
652	if (rnd_orig->rnd_nhop == NULL) {
653		/* No paths to add to, just reference current nhop */
654		*rnd_new = *rnd_add;
655		if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0)
656			return (EAGAIN);
657		return (0);
658	}
659
660	wn[0].nh = rnd_add->rnd_nhop;
661	wn[0].weight = rnd_add->rnd_weight;
662
663	if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) {
664		/* Simple merge of 2 non-multipath nexthops */
665		wn[1].nh = rnd_orig->rnd_nhop;
666		wn[1].weight = rnd_orig->rnd_weight;
667		nhg_priv = get_nhgrp(ctl, wn, 2, &error);
668	} else {
669		/* Get new nhop group with @rt->rt_nhop as an additional nhop */
670		nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1,
671		    &error);
672	}
673
674	if (nhg_priv == NULL)
675		return (error);
676	rnd_new->rnd_nhgrp = nhg_priv->nhg;
677	rnd_new->rnd_weight = 0;
678
679	return (0);
680}
681
682/*
683 * Returns pointer to array of nexthops with weights for
684 * given @nhg. Stores number of items in the array into @pnum_nhops.
685 */
686struct weightened_nhop *
687nhgrp_get_nhops(struct nhgrp_object *nhg, uint32_t *pnum_nhops)
688{
689	struct nhgrp_priv *nhg_priv;
690
691	KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
692
693	nhg_priv = NHGRP_PRIV(nhg);
694	*pnum_nhops = nhg_priv->nhg_nh_count;
695
696	return (nhg_priv->nhg_nh_weights);
697}
698
699__noinline static int
700dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv,
701    char *buffer, size_t buffer_size, struct sysctl_req *w)
702{
703	struct rt_msghdr *rtm;
704	struct nhgrp_external *nhge;
705	struct nhgrp_container *nhgc;
706	const struct nhgrp_object *nhg;
707	struct nhgrp_nhop_external *ext;
708	int error;
709	size_t sz;
710
711	nhg = nhg_priv->nhg;
712
713	sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
714	/* controlplane nexthops */
715	sz += sizeof(struct nhgrp_container);
716	sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
717	/* dataplane nexthops */
718	sz += sizeof(struct nhgrp_container);
719	sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
720
721	KASSERT(sz <= buffer_size, ("increase nhgrp buffer size"));
722
723	bzero(buffer, sz);
724
725	rtm = (struct rt_msghdr *)buffer;
726	rtm->rtm_msglen = sz;
727	rtm->rtm_version = RTM_VERSION;
728	rtm->rtm_type = RTM_GET;
729
730	nhge = (struct nhgrp_external *)(rtm + 1);
731
732	nhge->nhg_idx = nhg_priv->nhg_idx;
733	nhge->nhg_refcount = nhg_priv->nhg_refcount;
734
735	/* fill in control plane nexthops firs */
736	nhgc = (struct nhgrp_container *)(nhge + 1);
737	nhgc->nhgc_type = NHG_C_TYPE_CNHOPS;
738	nhgc->nhgc_subtype = 0;
739	nhgc->nhgc_len = sizeof(struct nhgrp_container);
740	nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
741	nhgc->nhgc_count = nhg_priv->nhg_nh_count;
742
743	ext = (struct nhgrp_nhop_external *)(nhgc + 1);
744	for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
745		ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx;
746		ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight;
747	}
748
749	/* fill in dataplane nexthops */
750	nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]);
751	nhgc->nhgc_type = NHG_C_TYPE_DNHOPS;
752	nhgc->nhgc_subtype = 0;
753	nhgc->nhgc_len = sizeof(struct nhgrp_container);
754	nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
755	nhgc->nhgc_count = nhg->nhg_size;
756
757	ext = (struct nhgrp_nhop_external *)(nhgc + 1);
758	for (int i = 0; i < nhg->nhg_size; i++) {
759		ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx;
760		ext[i].nh_weight = 0;
761	}
762
763	error = SYSCTL_OUT(w, buffer, sz);
764
765	return (error);
766}
767
768uint32_t
769nhgrp_get_idx(const struct nhgrp_object *nhg)
770{
771	const struct nhgrp_priv *nhg_priv;
772
773	nhg_priv = NHGRP_PRIV_CONST(nhg);
774	return (nhg_priv->nhg_idx);
775}
776
777uint32_t
778nhgrp_get_count(struct rib_head *rh)
779{
780	struct nh_control *ctl;
781	uint32_t count;
782
783	ctl = rh->nh_control;
784
785	NHOPS_RLOCK(ctl);
786	count = ctl->gr_head.items_count;
787	NHOPS_RUNLOCK(ctl);
788
789	return (count);
790}
791
792int
793nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w)
794{
795	struct nh_control *ctl = rh->nh_control;
796	struct epoch_tracker et;
797	struct nhgrp_priv *nhg_priv;
798	char *buffer;
799	size_t sz;
800	int error = 0;
801
802	if (ctl->gr_head.items_count == 0)
803		return (0);
804
805	/* Calculate the maximum nhop group size in bytes */
806	sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
807	sz += 2 * sizeof(struct nhgrp_container);
808	sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH;
809	buffer = malloc(sz, M_TEMP, M_NOWAIT);
810	if (buffer == NULL)
811		return (ENOMEM);
812
813	NET_EPOCH_ENTER(et);
814	NHOPS_RLOCK(ctl);
815	CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
816		error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w);
817		if (error != 0)
818			break;
819	} CHT_SLIST_FOREACH_END;
820	NHOPS_RUNLOCK(ctl);
821	NET_EPOCH_EXIT(et);
822
823	free(buffer, M_TEMP);
824
825	return (error);
826}
827