1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24/* Copyright (c) 1990 Mentat Inc. */
25
26#include <inet/ip.h>
27#include <inet/tcp_impl.h>
28#include <sys/multidata.h>
29#include <sys/sunddi.h>
30
31/* Max size IP datagram is 64k - 1 */
32#define	TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
33#define	TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
34
35/* Max of the above */
36#define	TCP_MSS_MAX		TCP_MSS_MAX_IPV4
37
38#define	TCP_XMIT_LOWATER	4096
39#define	TCP_XMIT_HIWATER	49152
40#define	TCP_RECV_LOWATER	2048
41#define	TCP_RECV_HIWATER	128000
42
43/*
44 * Set the RFC 1948 pass phrase
45 */
46/* ARGSUSED */
47static int
48tcp_set_1948phrase(void *cbarg,  cred_t *cr, mod_prop_info_t *pinfo,
49    const char *ifname, const void* pr_val, uint_t flags)
50{
51	tcp_stack_t	*tcps = (tcp_stack_t *)cbarg;
52
53	if (flags & MOD_PROP_DEFAULT)
54		return (ENOTSUP);
55
56	/*
57	 * Basically, value contains a new pass phrase.  Pass it along!
58	 */
59	tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps);
60	return (0);
61}
62
63/*
64 * returns the current list of listener limit configuration.
65 */
66/* ARGSUSED */
67static int
68tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname,
69    void *val, uint_t psize, uint_t flags)
70{
71	tcp_stack_t	*tcps = (tcp_stack_t *)cbarg;
72	tcp_listener_t	*tl;
73	char		*pval = val;
74	size_t		nbytes = 0, tbytes = 0;
75	uint_t		size;
76	int		err = 0;
77
78	bzero(pval, psize);
79	size = psize;
80
81	if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
82		return (0);
83
84	mutex_enter(&tcps->tcps_listener_conf_lock);
85	for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
86	    tl = list_next(&tcps->tcps_listener_conf, tl)) {
87		if (psize == size)
88			nbytes = snprintf(pval, size, "%d:%d",  tl->tl_port,
89			    tl->tl_ratio);
90		else
91			nbytes = snprintf(pval, size, ",%d:%d",  tl->tl_port,
92			    tl->tl_ratio);
93		size -= nbytes;
94		pval += nbytes;
95		tbytes += nbytes;
96		if (tbytes >= psize) {
97			/* Buffer overflow, stop copying information */
98			err = ENOBUFS;
99			break;
100		}
101	}
102
103	mutex_exit(&tcps->tcps_listener_conf_lock);
104	return (err);
105}
106
107/*
108 * add a new listener limit configuration.
109 */
110/* ARGSUSED */
111static int
112tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
113    const char *ifname, const void* pval, uint_t flags)
114{
115	tcp_listener_t	*new_tl;
116	tcp_listener_t	*tl;
117	long		lport;
118	long		ratio;
119	char		*colon;
120	tcp_stack_t	*tcps = (tcp_stack_t *)cbarg;
121
122	if (flags & MOD_PROP_DEFAULT)
123		return (ENOTSUP);
124
125	if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
126	    lport > USHRT_MAX || *colon != ':') {
127		return (EINVAL);
128	}
129	if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
130		return (EINVAL);
131
132	mutex_enter(&tcps->tcps_listener_conf_lock);
133	for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
134	    tl = list_next(&tcps->tcps_listener_conf, tl)) {
135		/* There is an existing entry, so update its ratio value. */
136		if (tl->tl_port == lport) {
137			tl->tl_ratio = ratio;
138			mutex_exit(&tcps->tcps_listener_conf_lock);
139			return (0);
140		}
141	}
142
143	if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
144	    NULL) {
145		mutex_exit(&tcps->tcps_listener_conf_lock);
146		return (ENOMEM);
147	}
148
149	new_tl->tl_port = lport;
150	new_tl->tl_ratio = ratio;
151	list_insert_tail(&tcps->tcps_listener_conf, new_tl);
152	mutex_exit(&tcps->tcps_listener_conf_lock);
153	return (0);
154}
155
156/*
157 * remove a listener limit configuration.
158 */
159/* ARGSUSED */
160static int
161tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
162    const char *ifname, const void* pval, uint_t flags)
163{
164	tcp_listener_t	*tl;
165	long		lport;
166	tcp_stack_t	*tcps = (tcp_stack_t *)cbarg;
167
168	if (flags & MOD_PROP_DEFAULT)
169		return (ENOTSUP);
170
171	if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
172	    lport > USHRT_MAX) {
173		return (EINVAL);
174	}
175	mutex_enter(&tcps->tcps_listener_conf_lock);
176	for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
177	    tl = list_next(&tcps->tcps_listener_conf, tl)) {
178		if (tl->tl_port == lport) {
179			list_remove(&tcps->tcps_listener_conf, tl);
180			mutex_exit(&tcps->tcps_listener_conf_lock);
181			kmem_free(tl, sizeof (tcp_listener_t));
182			return (0);
183		}
184	}
185	mutex_exit(&tcps->tcps_listener_conf_lock);
186	return (ESRCH);
187}
188
189/*
190 * All of these are alterable, within the min/max values given, at run time.
191 *
192 * Note: All those tunables which do not start with "_" are Committed and
193 * therefore are public. See PSARC 2010/080.
194 */
195mod_prop_info_t tcp_propinfo_tbl[] = {
196	/* tunable - 0 */
197	{ "_time_wait_interval", MOD_PROTO_TCP,
198	    mod_set_uint32, mod_get_uint32,
199	    {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
200
201	{ "_conn_req_max_q", MOD_PROTO_TCP,
202	    mod_set_uint32, mod_get_uint32,
203	    {1, UINT32_MAX, 128}, {128} },
204
205	{ "_conn_req_max_q0", MOD_PROTO_TCP,
206	    mod_set_uint32, mod_get_uint32,
207	    {0, UINT32_MAX, 1024}, {1024} },
208
209	{ "_conn_req_min", MOD_PROTO_TCP,
210	    mod_set_uint32, mod_get_uint32,
211	    {1, 1024, 1}, {1} },
212
213	{ "_conn_grace_period", MOD_PROTO_TCP,
214	    mod_set_uint32, mod_get_uint32,
215	    {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
216
217	{ "_cwnd_max", MOD_PROTO_TCP,
218	    mod_set_uint32, mod_get_uint32,
219	    {128, (1<<30), 1024*1024}, {1024*1024} },
220
221	{ "_debug", MOD_PROTO_TCP,
222	    mod_set_uint32, mod_get_uint32,
223	    {0, 10, 0}, {0} },
224
225	{ "smallest_nonpriv_port", MOD_PROTO_TCP,
226	    mod_set_uint32, mod_get_uint32,
227	    {1024, (32*1024), 1024}, {1024} },
228
229	{ "_ip_abort_cinterval", MOD_PROTO_TCP,
230	    mod_set_uint32, mod_get_uint32,
231	    {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
232
233	{ "_ip_abort_linterval", MOD_PROTO_TCP,
234	    mod_set_uint32, mod_get_uint32,
235	    {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
236
237	/* tunable - 10 */
238	{ "_ip_abort_interval", MOD_PROTO_TCP,
239	    mod_set_uint32, mod_get_uint32,
240	    {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
241
242	{ "_ip_notify_cinterval", MOD_PROTO_TCP,
243	    mod_set_uint32, mod_get_uint32,
244	    {1*SECONDS, UINT32_MAX, 10*SECONDS},
245	    {10*SECONDS} },
246
247	{ "_ip_notify_interval", MOD_PROTO_TCP,
248	    mod_set_uint32, mod_get_uint32,
249	    {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
250
251	{ "_ipv4_ttl", MOD_PROTO_TCP,
252	    mod_set_uint32, mod_get_uint32,
253	    {1, 255, 64}, {64} },
254
255	{ "_keepalive_interval", MOD_PROTO_TCP,
256	    mod_set_uint32, mod_get_uint32,
257	    {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
258
259	{ "_maxpsz_multiplier", MOD_PROTO_TCP,
260	    mod_set_uint32, mod_get_uint32,
261	    {0, 100, 10}, {10} },
262
263	{ "_mss_def_ipv4", MOD_PROTO_TCP,
264	    mod_set_uint32, mod_get_uint32,
265	    {1, TCP_MSS_MAX_IPV4, 536}, {536} },
266
267	{ "_mss_max_ipv4", MOD_PROTO_TCP,
268	    mod_set_uint32, mod_get_uint32,
269	    {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
270	    {TCP_MSS_MAX_IPV4} },
271
272	{ "_mss_min", MOD_PROTO_TCP,
273	    mod_set_uint32, mod_get_uint32,
274	    {1, TCP_MSS_MAX, 108}, {108} },
275
276	{ "_naglim_def", MOD_PROTO_TCP,
277	    mod_set_uint32, mod_get_uint32,
278	    {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
279
280	/* tunable - 20 */
281	{ "_rexmit_interval_initial", MOD_PROTO_TCP,
282	    mod_set_uint32, mod_get_uint32,
283	    {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
284
285	{ "_rexmit_interval_max", MOD_PROTO_TCP,
286	    mod_set_uint32, mod_get_uint32,
287	    {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
288
289	{ "_rexmit_interval_min", MOD_PROTO_TCP,
290	    mod_set_uint32, mod_get_uint32,
291	    {1*MS, 2*HOURS, 400*MS}, {400*MS} },
292
293	{ "_deferred_ack_interval", MOD_PROTO_TCP,
294	    mod_set_uint32, mod_get_uint32,
295	    {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
296
297	{ "_snd_lowat_fraction", MOD_PROTO_TCP,
298	    mod_set_uint32, mod_get_uint32,
299	    {0, 16, 0}, {0} },
300
301	{ "_dupack_fast_retransmit", MOD_PROTO_TCP,
302	    mod_set_uint32, mod_get_uint32,
303	    {1, 10000, 3}, {3} },
304
305	{ "_ignore_path_mtu", MOD_PROTO_TCP,
306	    mod_set_boolean, mod_get_boolean,
307	    {B_FALSE}, {B_FALSE} },
308
309	{ "smallest_anon_port", MOD_PROTO_TCP,
310	    mod_set_uint32, mod_get_uint32,
311	    {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
312
313	{ "largest_anon_port", MOD_PROTO_TCP,
314	    mod_set_uint32, mod_get_uint32,
315	    {1024, ULP_MAX_PORT, ULP_MAX_PORT},
316	    {ULP_MAX_PORT} },
317
318	{ "send_maxbuf", MOD_PROTO_TCP,
319	    mod_set_uint32, mod_get_uint32,
320	    {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER},
321	    {TCP_XMIT_HIWATER} },
322
323	/* tunable - 30 */
324	{ "_xmit_lowat", MOD_PROTO_TCP,
325	    mod_set_uint32, mod_get_uint32,
326	    {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER},
327	    {TCP_XMIT_LOWATER} },
328
329	{ "recv_maxbuf", MOD_PROTO_TCP,
330	    mod_set_uint32, mod_get_uint32,
331	    {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER},
332	    {TCP_RECV_HIWATER} },
333
334	{ "_recv_hiwat_minmss", MOD_PROTO_TCP,
335	    mod_set_uint32, mod_get_uint32,
336	    {1, 65536, 4}, {4} },
337
338	{ "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
339	    mod_set_uint32, mod_get_uint32,
340	    {1*SECONDS, 2*HOURS, 60*SECONDS},
341	    {60*SECONDS} },
342
343	{ "_max_buf", MOD_PROTO_TCP,
344	    mod_set_uint32, mod_get_uint32,
345	    {8192, (1<<30), 1024*1024}, {1024*1024} },
346
347	/*
348	 * Question:  What default value should I set for tcp_strong_iss?
349	 */
350	{ "_strong_iss", MOD_PROTO_TCP,
351	    mod_set_uint32, mod_get_uint32,
352	    {0, 2, 1}, {1} },
353
354	{ "_rtt_updates", MOD_PROTO_TCP,
355	    mod_set_uint32, mod_get_uint32,
356	    {0, 65536, 20}, {20} },
357
358	{ "_wscale_always", MOD_PROTO_TCP,
359	    mod_set_boolean, mod_get_boolean,
360	    {B_TRUE}, {B_TRUE} },
361
362	{ "_tstamp_always", MOD_PROTO_TCP,
363	    mod_set_boolean, mod_get_boolean,
364	    {B_FALSE}, {B_FALSE} },
365
366	{ "_tstamp_if_wscale", MOD_PROTO_TCP,
367	    mod_set_boolean, mod_get_boolean,
368	    {B_TRUE}, {B_TRUE} },
369
370	/* tunable - 40 */
371	{ "_rexmit_interval_extra", MOD_PROTO_TCP,
372	    mod_set_uint32, mod_get_uint32,
373	    {0*MS, 2*HOURS, 0*MS}, {0*MS} },
374
375	{ "_deferred_acks_max", MOD_PROTO_TCP,
376	    mod_set_uint32, mod_get_uint32,
377	    {0, 16, 2}, {2} },
378
379	{ "_slow_start_after_idle", MOD_PROTO_TCP,
380	    mod_set_uint32, mod_get_uint32,
381	    {1, 16384, 4}, {4} },
382
383	{ "_slow_start_initial", MOD_PROTO_TCP,
384	    mod_set_uint32, mod_get_uint32,
385	    {1, 4, 4}, {4} },
386
387	{ "sack", MOD_PROTO_TCP,
388	    mod_set_uint32, mod_get_uint32,
389	    {0, 2, 2}, {2} },
390
391	{ "_ipv6_hoplimit", MOD_PROTO_TCP,
392	    mod_set_uint32, mod_get_uint32,
393	    {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
394	    {IPV6_DEFAULT_HOPS} },
395
396	{ "_mss_def_ipv6", MOD_PROTO_TCP,
397	    mod_set_uint32, mod_get_uint32,
398	    {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
399
400	{ "_mss_max_ipv6", MOD_PROTO_TCP,
401	    mod_set_uint32, mod_get_uint32,
402	    {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
403	    {TCP_MSS_MAX_IPV6} },
404
405	{ "_rev_src_routes", MOD_PROTO_TCP,
406	    mod_set_boolean, mod_get_boolean,
407	    {B_FALSE}, {B_FALSE} },
408
409	{ "_local_dack_interval", MOD_PROTO_TCP,
410	    mod_set_uint32, mod_get_uint32,
411	    {10*MS, 500*MS, 50*MS}, {50*MS} },
412
413	/* tunable - 50 */
414	{ "_local_dacks_max", MOD_PROTO_TCP,
415	    mod_set_uint32, mod_get_uint32,
416	    {0, 16, 8}, {8} },
417
418	{ "ecn", MOD_PROTO_TCP,
419	    mod_set_uint32, mod_get_uint32,
420	    {0, 2, 1}, {1} },
421
422	{ "_rst_sent_rate_enabled", MOD_PROTO_TCP,
423	    mod_set_boolean, mod_get_boolean,
424	    {B_TRUE}, {B_TRUE} },
425
426	{ "_rst_sent_rate", MOD_PROTO_TCP,
427	    mod_set_uint32, mod_get_uint32,
428	    {0, UINT32_MAX, 40}, {40} },
429
430	{ "_push_timer_interval", MOD_PROTO_TCP,
431	    mod_set_uint32, mod_get_uint32,
432	    {0, 100*MS, 50*MS}, {50*MS} },
433
434	{ "_use_smss_as_mss_opt", MOD_PROTO_TCP,
435	    mod_set_boolean, mod_get_boolean,
436	    {B_FALSE}, {B_FALSE} },
437
438	{ "_keepalive_abort_interval", MOD_PROTO_TCP,
439	    mod_set_uint32, mod_get_uint32,
440	    {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
441
442	/*
443	 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
444	 * layer header.  It has to be a multiple of 8.
445	 */
446	{ "_wroff_xtra", MOD_PROTO_TCP,
447	    mod_set_aligned, mod_get_uint32,
448	    {0, 256, 32}, {32} },
449
450	{ "_dev_flow_ctl", MOD_PROTO_TCP,
451	    mod_set_boolean, mod_get_boolean,
452	    {B_FALSE}, {B_FALSE} },
453
454	{ "_reass_timeout", MOD_PROTO_TCP,
455	    mod_set_uint32, mod_get_uint32,
456	    {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
457
458	/* tunable - 60 */
459	{ "extra_priv_ports", MOD_PROTO_TCP,
460	    mod_set_extra_privports, mod_get_extra_privports,
461	    {1, ULP_MAX_PORT, 0}, {0} },
462
463	{ "_1948_phrase", MOD_PROTO_TCP,
464	    tcp_set_1948phrase, NULL, {0}, {0} },
465
466	{ "_listener_limit_conf", MOD_PROTO_TCP,
467	    NULL, tcp_listener_conf_get, {0}, {0} },
468
469	{ "_listener_limit_conf_add", MOD_PROTO_TCP,
470	    tcp_listener_conf_add, NULL, {0}, {0} },
471
472	{ "_listener_limit_conf_del", MOD_PROTO_TCP,
473	    tcp_listener_conf_del, NULL, {0}, {0} },
474
475	{ "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
476
477	{ NULL, 0, NULL, NULL, {0}, {0} }
478};
479
480int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);
481