177574Skris
2139823Simp/*-
3176042Ssilby * Copyright (c) 2008 Michael J. Silbersack.
477574Skris * All rights reserved.
577574Skris *
677574Skris * Redistribution and use in source and binary forms, with or without
777574Skris * modification, are permitted provided that the following conditions
877574Skris * are met:
977574Skris * 1. Redistributions of source code must retain the above copyright
10176042Ssilby *    notice unmodified, this list of conditions, and the following
11176042Ssilby *    disclaimer.
1277574Skris * 2. Redistributions in binary form must reproduce the above copyright
1377574Skris *    notice, this list of conditions and the following disclaimer in the
1477574Skris *    documentation and/or other materials provided with the distribution.
1577574Skris *
1677574Skris * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1777574Skris * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1877574Skris * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1977574Skris * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2077574Skris * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2177574Skris * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2277574Skris * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2377574Skris * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2477574Skris * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2577574Skris * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2677574Skris */
2777574Skris
28176042Ssilby#include <sys/cdefs.h>
29176042Ssilby__FBSDID("$FreeBSD: releng/11.0/sys/netinet/ip_id.c 302372 2016-07-06 14:09:49Z nwhitehorn $");
30176042Ssilby
31176042Ssilby/*
32176042Ssilby * IP ID generation is a fascinating topic.
3377574Skris *
34176042Ssilby * In order to avoid ID collisions during packet reassembly, common sense
35176042Ssilby * dictates that the period between reuse of IDs be as large as possible.
36176042Ssilby * This leads to the classic implementation of a system-wide counter, thereby
37176042Ssilby * ensuring that IDs repeat only once every 2^16 packets.
3877574Skris *
39176042Ssilby * Subsequent security researchers have pointed out that using a global
40176042Ssilby * counter makes ID values predictable.  This predictability allows traffic
41176042Ssilby * analysis, idle scanning, and even packet injection in specific cases.
42176042Ssilby * These results suggest that IP IDs should be as random as possible.
4377574Skris *
44176042Ssilby * The "searchable queues" algorithm used in this IP ID implementation was
45176042Ssilby * proposed by Amit Klein.  It is a compromise between the above two
46176042Ssilby * viewpoints that has provable behavior that can be tuned to the user's
47176042Ssilby * requirements.
48176042Ssilby *
49176042Ssilby * The basic concept is that we supplement a standard random number generator
50176042Ssilby * with a queue of the last L IDs that we have handed out to ensure that all
51176042Ssilby * IDs have a period of at least L.
52176042Ssilby *
53176042Ssilby * To efficiently implement this idea, we keep two data structures: a
54176042Ssilby * circular array of IDs of size L and a bitstring of 65536 bits.
55176042Ssilby *
56176042Ssilby * To start, we ask the RNG for a new ID.  A quick index into the bitstring
57176042Ssilby * is used to determine if this is a recently used value.  The process is
58176042Ssilby * repeated until a value is returned that is not in the bitstring.
59176042Ssilby *
60176042Ssilby * Having found a usable ID, we remove the ID stored at the current position
61176042Ssilby * in the queue from the bitstring and replace it with our new ID.  Our new
62176042Ssilby * ID is then added to the bitstring and the queue pointer is incremented.
63176042Ssilby *
64176042Ssilby * The lower limit of 512 was chosen because there doesn't seem to be much
65176042Ssilby * point to having a smaller value.  The upper limit of 32768 was chosen for
66176042Ssilby * two reasons.  First, every step above 32768 decreases the entropy.  Taken
67176042Ssilby * to an extreme, 65533 would offer 1 bit of entropy.  Second, the number of
68176042Ssilby * attempts it takes the algorithm to find an unused ID drastically
69176042Ssilby * increases, killing performance.  The default value of 8192 was chosen
70176042Ssilby * because it provides a good tradeoff between randomness and non-repetition.
71176042Ssilby *
72176042Ssilby * With L=8192, the queue will use 16K of memory.  The bitstring always
73176042Ssilby * uses 8K of memory.  No memory is allocated until the use of random ids is
74176042Ssilby * enabled.
7577574Skris */
7677574Skris
77280971Sglebius#include <sys/param.h>
78280971Sglebius#include <sys/systm.h>
79280971Sglebius#include <sys/counter.h>
80280972Sbz#include <sys/kernel.h>
81176042Ssilby#include <sys/malloc.h>
82176042Ssilby#include <sys/lock.h>
83176042Ssilby#include <sys/mutex.h>
8477574Skris#include <sys/random.h>
85280971Sglebius#include <sys/smp.h>
86176042Ssilby#include <sys/sysctl.h>
87280788Sglebius#include <sys/bitstring.h>
88280788Sglebius
89280788Sglebius#include <net/vnet.h>
90280788Sglebius
91176042Ssilby#include <netinet/in.h>
92280971Sglebius#include <netinet/ip.h>
93176042Ssilby#include <netinet/ip_var.h>
9477574Skris
95280971Sglebius/*
96280971Sglebius * By default we generate IP ID only for non-atomic datagrams, as
97280971Sglebius * suggested by RFC6864.  We use per-CPU counter for that, or if
98280971Sglebius * user wants to, we can turn on random ID generation.
99280971Sglebius */
100280971Sglebiusstatic VNET_DEFINE(int, ip_rfc6864) = 1;
101280971Sglebiusstatic VNET_DEFINE(int, ip_do_randomid) = 0;
102280971Sglebius#define	V_ip_rfc6864		VNET(ip_rfc6864)
103280971Sglebius#define	V_ip_do_randomid	VNET(ip_do_randomid)
104280971Sglebius
105280971Sglebius/*
106280971Sglebius * Random ID state engine.
107280971Sglebius */
108176042Ssilbystatic MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
109280788Sglebiusstatic VNET_DEFINE(uint16_t *, id_array);
110280788Sglebiusstatic VNET_DEFINE(bitstr_t *, id_bits);
111280788Sglebiusstatic VNET_DEFINE(int, array_ptr);
112280788Sglebiusstatic VNET_DEFINE(int, array_size);
113280788Sglebiusstatic VNET_DEFINE(int, random_id_collisions);
114280788Sglebiusstatic VNET_DEFINE(int, random_id_total);
115280788Sglebiusstatic VNET_DEFINE(struct mtx, ip_id_mtx);
116280788Sglebius#define	V_id_array	VNET(id_array)
117280788Sglebius#define	V_id_bits	VNET(id_bits)
118280788Sglebius#define	V_array_ptr	VNET(array_ptr)
119280788Sglebius#define	V_array_size	VNET(array_size)
120280788Sglebius#define	V_random_id_collisions	VNET(random_id_collisions)
121280788Sglebius#define	V_random_id_total	VNET(random_id_total)
122280788Sglebius#define	V_ip_id_mtx	VNET(ip_id_mtx)
12377574Skris
124280971Sglebius/*
125280971Sglebius * Non-random ID state engine is simply a per-cpu counter.
126280971Sglebius */
127280971Sglebiusstatic VNET_DEFINE(counter_u64_t, ip_id);
128280971Sglebius#define	V_ip_id		VNET(ip_id)
129280971Sglebius
130280971Sglebiusstatic int	sysctl_ip_randomid(SYSCTL_HANDLER_ARGS);
131280971Sglebiusstatic int	sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);
132280787Sglebiusstatic void	ip_initid(int);
133280971Sglebiusstatic uint16_t ip_randomid(void);
134280788Sglebiusstatic void	ipid_sysinit(void);
135280788Sglebiusstatic void	ipid_sysuninit(void);
13677574Skris
137185571SbzSYSCTL_DECL(_net_inet_ip);
138280971SglebiusSYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id,
139280971Sglebius    CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW,
140280971Sglebius    &VNET_NAME(ip_do_randomid), 0, sysctl_ip_randomid, "IU",
141280971Sglebius    "Assign random ip_id values");
142280971SglebiusSYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_VNET | CTLFLAG_RW,
143280971Sglebius    &VNET_NAME(ip_rfc6864), 0,
144280971Sglebius    "Use constant IP ID for atomic datagrams");
145280788SglebiusSYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period,
146280788Sglebius    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
147280788Sglebius    &VNET_NAME(array_size), 0, sysctl_ip_id_change, "IU", "IP ID Array size");
148280788SglebiusSYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions,
149280788Sglebius    CTLFLAG_RD | CTLFLAG_VNET,
150280788Sglebius    &VNET_NAME(random_id_collisions), 0, "Count of IP ID collisions");
151280788SglebiusSYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD | CTLFLAG_VNET,
152280788Sglebius    &VNET_NAME(random_id_total), 0, "Count of IP IDs created");
15377574Skris
154176042Ssilbystatic int
155280971Sglebiussysctl_ip_randomid(SYSCTL_HANDLER_ARGS)
156280971Sglebius{
157280971Sglebius	int error, new;
158280971Sglebius
159280971Sglebius	new = V_ip_do_randomid;
160280971Sglebius	error = sysctl_handle_int(oidp, &new, 0, req);
161280971Sglebius	if (error || req->newptr == NULL)
162280971Sglebius		return (error);
163280971Sglebius	if (new != 0 && new != 1)
164280971Sglebius		return (EINVAL);
165280971Sglebius	if (new == V_ip_do_randomid)
166280971Sglebius		return (0);
167280971Sglebius	if (new == 1 && V_ip_do_randomid == 0)
168280971Sglebius		ip_initid(8192);
169280971Sglebius	/* We don't free memory when turning random ID off, due to race. */
170280971Sglebius	V_ip_do_randomid = new;
171280971Sglebius	return (0);
172280971Sglebius}
173280971Sglebius
174280971Sglebiusstatic int
175176042Ssilbysysctl_ip_id_change(SYSCTL_HANDLER_ARGS)
17677574Skris{
177176042Ssilby	int error, new;
17877574Skris
179280788Sglebius	new = V_array_size;
180176042Ssilby	error = sysctl_handle_int(oidp, &new, 0, req);
181176042Ssilby	if (error == 0 && req->newptr) {
182280787Sglebius		if (new >= 512 && new <= 32768)
183280787Sglebius			ip_initid(new);
184280787Sglebius		else
185176042Ssilby			error = EINVAL;
18677574Skris	}
187176042Ssilby	return (error);
18877574Skris}
18977574Skris
190133874Srwatsonstatic void
191280787Sglebiusip_initid(int new_size)
19277574Skris{
193280787Sglebius	uint16_t *new_array;
194280787Sglebius	bitstr_t *new_bits;
19577574Skris
196280787Sglebius	new_array = malloc(new_size * sizeof(uint16_t), M_IPID,
197280787Sglebius	    M_WAITOK | M_ZERO);
198280787Sglebius	new_bits = malloc(bitstr_size(65536), M_IPID, M_WAITOK | M_ZERO);
19977574Skris
200280788Sglebius	mtx_lock(&V_ip_id_mtx);
201280788Sglebius	if (V_id_array != NULL) {
202280788Sglebius		free(V_id_array, M_IPID);
203280788Sglebius		free(V_id_bits, M_IPID);
20477574Skris	}
205280788Sglebius	V_id_array = new_array;
206280788Sglebius	V_id_bits = new_bits;
207280788Sglebius	V_array_size = new_size;
208280788Sglebius	V_array_ptr = 0;
209280788Sglebius	V_random_id_collisions = 0;
210280788Sglebius	V_random_id_total = 0;
211280788Sglebius	mtx_unlock(&V_ip_id_mtx);
21277574Skris}
21377574Skris
214280971Sglebiusstatic uint16_t
21577574Skrisip_randomid(void)
21677574Skris{
217280787Sglebius	uint16_t new_id;
21877574Skris
219280788Sglebius	mtx_lock(&V_ip_id_mtx);
220176042Ssilby	/*
221176042Ssilby	 * To avoid a conflict with the zeros that the array is initially
222176042Ssilby	 * filled with, we never hand out an id of zero.
223176042Ssilby	 */
224176042Ssilby	new_id = 0;
225176042Ssilby	do {
226176042Ssilby		if (new_id != 0)
227280788Sglebius			V_random_id_collisions++;
228176042Ssilby		arc4rand(&new_id, sizeof(new_id), 0);
229280788Sglebius	} while (bit_test(V_id_bits, new_id) || new_id == 0);
230280788Sglebius	bit_clear(V_id_bits, V_id_array[V_array_ptr]);
231280788Sglebius	bit_set(V_id_bits, new_id);
232280788Sglebius	V_id_array[V_array_ptr] = new_id;
233280788Sglebius	V_array_ptr++;
234280788Sglebius	if (V_array_ptr == V_array_size)
235280788Sglebius		V_array_ptr = 0;
236280788Sglebius	V_random_id_total++;
237280788Sglebius	mtx_unlock(&V_ip_id_mtx);
238176042Ssilby	return (new_id);
23977574Skris}
240280787Sglebius
241280971Sglebiusvoid
242280971Sglebiusip_fillid(struct ip *ip)
243280971Sglebius{
244280971Sglebius
245280971Sglebius	/*
246280971Sglebius	 * Per RFC6864 Section 4
247280971Sglebius	 *
248280971Sglebius	 * o  Atomic datagrams: (DF==1) && (MF==0) && (frag_offset==0)
249280971Sglebius	 * o  Non-atomic datagrams: (DF==0) || (MF==1) || (frag_offset>0)
250280971Sglebius	 */
251280971Sglebius	if (V_ip_rfc6864 && (ip->ip_off & htons(IP_DF)) == htons(IP_DF))
252280971Sglebius		ip->ip_id = 0;
253280971Sglebius	else if (V_ip_do_randomid)
254280971Sglebius		ip->ip_id = ip_randomid();
255280971Sglebius	else {
256280971Sglebius		counter_u64_add(V_ip_id, 1);
257280989Sglebius		/*
258280989Sglebius		 * There are two issues about this trick, to be kept in mind.
259280989Sglebius		 * 1) We can migrate between counter_u64_add() and next
260280989Sglebius		 *    line, and grab counter from other CPU, resulting in too
261280989Sglebius		 *    quick ID reuse. This is tolerable in our particular case,
262280989Sglebius		 *    since probability of such event is much lower then reuse
263280989Sglebius		 *    of ID due to legitimate overflow, that at modern Internet
264280989Sglebius		 *    speeds happens all the time.
265280989Sglebius		 * 2) We are relying on the fact that counter(9) is based on
266280989Sglebius		 *    UMA_ZONE_PCPU uma(9) zone. We also take only last
267280989Sglebius		 *    sixteen bits of a counter, so we don't care about the
268280989Sglebius		 *    fact that machines with 32-bit word update their counters
269280989Sglebius		 *    not atomically.
270280989Sglebius		 */
271280971Sglebius		ip->ip_id = htons((*(uint64_t *)zpcpu_get(V_ip_id)) & 0xffff);
272280971Sglebius	}
273280971Sglebius}
274280971Sglebius
275280787Sglebiusstatic void
276280788Sglebiusipid_sysinit(void)
277280787Sglebius{
278302372Snwhitehorn	int i;
279280787Sglebius
280280788Sglebius	mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF);
281280971Sglebius	V_ip_id = counter_u64_alloc(M_WAITOK);
282302372Snwhitehorn
283302372Snwhitehorn	CPU_FOREACH(i)
284280971Sglebius		arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0);
285280787Sglebius}
286280788SglebiusVNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL);
287280788Sglebius
288280788Sglebiusstatic void
289280788Sglebiusipid_sysuninit(void)
290280788Sglebius{
291280788Sglebius
292280971Sglebius	if (V_id_array != NULL) {
293280971Sglebius		free(V_id_array, M_IPID);
294280971Sglebius		free(V_id_bits, M_IPID);
295280971Sglebius	}
296280971Sglebius	counter_u64_free(V_ip_id);
297301504Sbz	mtx_destroy(&V_ip_id_mtx);
298280788Sglebius}
299302054SbzVNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ipid_sysuninit, NULL);
300