ip_stack.h revision 8348:4137e18bfaf0
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#ifndef	_INET_IP_STACK_H
28#define	_INET_IP_STACK_H
29
30#ifdef	__cplusplus
31extern "C" {
32#endif
33
34#include <sys/netstack.h>
35#include <netinet/igmp_var.h>
36
37#ifdef _KERNEL
38#include <sys/list.h>
39
40/*
41 * IP statistics.
42 */
43#define	IP_STAT(ipst, x)	((ipst)->ips_ip_statistics.x.value.ui64++)
44#define	IP_STAT_UPDATE(ipst, x, n) \
45		((ipst)->ips_ip_statistics.x.value.ui64 += (n))
46
47typedef struct ip_stat {
48	kstat_named_t	ipsec_fanout_proto;
49	kstat_named_t	ip_udp_fannorm;
50	kstat_named_t	ip_udp_fanmb;
51	kstat_named_t	ip_udp_fanothers;
52	kstat_named_t	ip_udp_fast_path;
53	kstat_named_t	ip_udp_slow_path;
54	kstat_named_t	ip_udp_input_err;
55	kstat_named_t	ip_tcppullup;
56	kstat_named_t	ip_tcpoptions;
57	kstat_named_t	ip_multipkttcp;
58	kstat_named_t	ip_tcp_fast_path;
59	kstat_named_t	ip_tcp_slow_path;
60	kstat_named_t	ip_tcp_input_error;
61	kstat_named_t	ip_db_ref;
62	kstat_named_t	ip_notaligned1;
63	kstat_named_t	ip_notaligned2;
64	kstat_named_t	ip_multimblk3;
65	kstat_named_t	ip_multimblk4;
66	kstat_named_t	ip_ipoptions;
67	kstat_named_t	ip_classify_fail;
68	kstat_named_t	ip_opt;
69	kstat_named_t	ip_udp_rput_local;
70	kstat_named_t	ipsec_proto_ahesp;
71	kstat_named_t	ip_conn_flputbq;
72	kstat_named_t	ip_conn_walk_drain;
73	kstat_named_t   ip_out_sw_cksum;
74	kstat_named_t   ip_in_sw_cksum;
75	kstat_named_t   ip_trash_ire_reclaim_calls;
76	kstat_named_t   ip_trash_ire_reclaim_success;
77	kstat_named_t   ip_ire_arp_timer_expired;
78	kstat_named_t   ip_ire_redirect_timer_expired;
79	kstat_named_t	ip_ire_pmtu_timer_expired;
80	kstat_named_t	ip_input_multi_squeue;
81	kstat_named_t	ip_tcp_in_full_hw_cksum_err;
82	kstat_named_t	ip_tcp_in_part_hw_cksum_err;
83	kstat_named_t	ip_tcp_in_sw_cksum_err;
84	kstat_named_t	ip_tcp_out_sw_cksum_bytes;
85	kstat_named_t	ip_udp_in_full_hw_cksum_err;
86	kstat_named_t	ip_udp_in_part_hw_cksum_err;
87	kstat_named_t	ip_udp_in_sw_cksum_err;
88	kstat_named_t	ip_udp_out_sw_cksum_bytes;
89	kstat_named_t	ip_frag_mdt_pkt_out;
90	kstat_named_t	ip_frag_mdt_discarded;
91	kstat_named_t	ip_frag_mdt_allocfail;
92	kstat_named_t	ip_frag_mdt_addpdescfail;
93	kstat_named_t	ip_frag_mdt_allocd;
94} ip_stat_t;
95
96
97/*
98 * IP6 statistics.
99 */
100#define	IP6_STAT(ipst, x)	((ipst)->ips_ip6_statistics.x.value.ui64++)
101#define	IP6_STAT_UPDATE(ipst, x, n)	\
102	((ipst)->ips_ip6_statistics.x.value.ui64 += (n))
103
104typedef struct ip6_stat {
105	kstat_named_t	ip6_udp_fast_path;
106	kstat_named_t	ip6_udp_slow_path;
107	kstat_named_t	ip6_udp_fannorm;
108	kstat_named_t	ip6_udp_fanmb;
109	kstat_named_t   ip6_out_sw_cksum;
110	kstat_named_t   ip6_in_sw_cksum;
111	kstat_named_t	ip6_tcp_in_full_hw_cksum_err;
112	kstat_named_t	ip6_tcp_in_part_hw_cksum_err;
113	kstat_named_t	ip6_tcp_in_sw_cksum_err;
114	kstat_named_t	ip6_tcp_out_sw_cksum_bytes;
115	kstat_named_t	ip6_udp_in_full_hw_cksum_err;
116	kstat_named_t	ip6_udp_in_part_hw_cksum_err;
117	kstat_named_t	ip6_udp_in_sw_cksum_err;
118	kstat_named_t	ip6_udp_out_sw_cksum_bytes;
119	kstat_named_t	ip6_frag_mdt_pkt_out;
120	kstat_named_t	ip6_frag_mdt_discarded;
121	kstat_named_t	ip6_frag_mdt_allocfail;
122	kstat_named_t	ip6_frag_mdt_addpdescfail;
123	kstat_named_t	ip6_frag_mdt_allocd;
124} ip6_stat_t;
125
126typedef struct ire_stats {
127	uint64_t ire_stats_alloced;	/* # of ires alloced */
128	uint64_t ire_stats_freed;	/* # of ires freed */
129	uint64_t ire_stats_inserted;	/* # of ires inserted in the bucket */
130	uint64_t ire_stats_deleted;	/* # of ires deleted from the bucket */
131} ire_stats_t;
132
133
134/*
135 * IP stack instances
136 */
137struct ip_stack {
138	netstack_t	*ips_netstack;	/* Common netstack */
139
140	struct ipparam_s	*ips_param_arr; 	/* ndd variable table */
141	struct ipndp_s		*ips_ndp_arr;
142
143	mib2_ipIfStatsEntry_t	ips_ip_mib;	/* SNMP fixed size info */
144	mib2_icmp_t	ips_icmp_mib;
145	/*
146	 * IPv6 mibs when the interface (ill) is not known.
147	 * When the ill is known the per-interface mib in the ill is used.
148	 */
149	mib2_ipIfStatsEntry_t	ips_ip6_mib;
150	mib2_ipv6IfIcmpEntry_t	ips_icmp6_mib;
151
152	struct igmpstat		ips_igmpstat;
153
154	kstat_t		*ips_ip_mibkp;	/* kstat exporting ip_mib data */
155	kstat_t		*ips_icmp_mibkp; /* kstat exporting icmp_mib data */
156	kstat_t		*ips_ip_kstat;
157	ip_stat_t	ips_ip_statistics;
158	kstat_t		*ips_ip6_kstat;
159	ip6_stat_t	ips_ip6_statistics;
160
161/* ip.c */
162	krwlock_t	ips_ip_g_nd_lock;
163	kmutex_t	ips_igmp_timer_lock;
164	kmutex_t	ips_mld_timer_lock;
165	kmutex_t	ips_ip_mi_lock;
166	kmutex_t	ips_ip_addr_avail_lock;
167	krwlock_t	ips_ill_g_lock;
168	krwlock_t	ips_ipsec_capab_ills_lock;
169				/* protects the list of IPsec capable ills */
170	struct ipsec_capab_ill_s *ips_ipsec_capab_ills_ah;
171	struct ipsec_capab_ill_s *ips_ipsec_capab_ills_esp;
172
173	krwlock_t	ips_ill_g_usesrc_lock;
174
175	struct ill_group *ips_illgrp_head_v4;	/* Head of IPv4 ill groups */
176	struct ill_group *ips_illgrp_head_v6;	/* Head of IPv6 ill groups */
177
178	/* Taskq dispatcher for capability operations */
179	kmutex_t	ips_capab_taskq_lock;
180	kcondvar_t	ips_capab_taskq_cv;
181	list_t		ips_capab_taskq_list;
182	kthread_t	*ips_capab_taskq_thread;
183	boolean_t	ips_capab_taskq_quit;
184
185/* ipclassifier.c - keep in ip_stack_t */
186	/* ipclassifier hash tables */
187	struct connf_s	*ips_rts_clients;
188	struct connf_s	*ips_ipcl_conn_fanout;
189	struct connf_s	*ips_ipcl_bind_fanout;
190	struct connf_s	*ips_ipcl_proto_fanout;
191	struct connf_s	*ips_ipcl_proto_fanout_v6;
192	struct connf_s	*ips_ipcl_udp_fanout;
193	struct connf_s	*ips_ipcl_raw_fanout;
194	uint_t		ips_ipcl_conn_fanout_size;
195	uint_t		ips_ipcl_bind_fanout_size;
196	uint_t		ips_ipcl_udp_fanout_size;
197	uint_t		ips_ipcl_raw_fanout_size;
198	struct connf_s	*ips_ipcl_globalhash_fanout;
199	int		ips_conn_g_index;
200
201/* ip.c */
202	/* Following protected by igmp_timer_lock */
203	int 		ips_igmp_time_to_next;	/* Time since last timeout */
204	int 		ips_igmp_timer_scheduled_last;
205	int		ips_igmp_deferred_next;
206	timeout_id_t	ips_igmp_timeout_id;
207	kthread_t	*ips_igmp_timer_thread;
208	boolean_t	ips_igmp_timer_setter_active;
209
210	/* Following protected by mld_timer_lock */
211	int 		ips_mld_time_to_next;	/* Time since last timeout */
212	int 		ips_mld_timer_scheduled_last;
213	int		ips_mld_deferred_next;
214	timeout_id_t	ips_mld_timeout_id;
215	kthread_t	*ips_mld_timer_thread;
216	boolean_t	ips_mld_timer_setter_active;
217
218	/* Protected by igmp_slowtimeout_lock */
219	timeout_id_t	ips_igmp_slowtimeout_id;
220	kmutex_t	ips_igmp_slowtimeout_lock;
221
222	/* Protected by mld_slowtimeout_lock */
223	timeout_id_t	ips_mld_slowtimeout_id;
224	kmutex_t	ips_mld_slowtimeout_lock;
225
226	/* IPv4 forwarding table */
227	struct radix_node_head *ips_ip_ftable;
228
229	/* This is dynamically allocated in ip_ire_init */
230	struct irb	 *ips_ip_cache_table;
231
232#define	IPV6_ABITS		128
233#define	IP6_MASK_TABLE_SIZE	(IPV6_ABITS + 1)	/* 129 ptrs */
234
235	struct irb	*ips_ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE];
236	/* This is dynamically allocated in ip_ire_init */
237	struct irb	*ips_ip_cache_table_v6;
238
239	uint32_t	ips_ire_handle;
240	/*
241	 * ire_ft_init_lock is used while initializing ip_forwarding_table
242	 * dynamically in ire_add.
243	 */
244	kmutex_t	ips_ire_ft_init_lock;
245	kmutex_t	ips_ire_handle_lock;	/* Protects ire_handle */
246
247	uint32_t	ips_ip_cache_table_size;
248	uint32_t	ips_ip6_cache_table_size;
249	uint32_t	ips_ip6_ftable_hash_size;
250
251	ire_stats_t 	ips_ire_stats_v4;	/* IPv4 ire statistics */
252	ire_stats_t 	ips_ire_stats_v6;	/* IPv6 ire statistics */
253
254	/* pending binds */
255	mblk_t		*ips_ip6_asp_pending_ops;
256	mblk_t		*ips_ip6_asp_pending_ops_tail;
257
258	/* Synchronize updates with table usage */
259	mblk_t		*ips_ip6_asp_pending_update; /* pending table updates */
260
261	boolean_t	ips_ip6_asp_uip;	/* table update in progress */
262	kmutex_t	ips_ip6_asp_lock;	/* protect all the above */
263	uint32_t	ips_ip6_asp_refcnt;	/* outstanding references */
264
265	struct ip6_asp	*ips_ip6_asp_table;
266	/* The number of policy entries in the table */
267	uint_t		ips_ip6_asp_table_count;
268
269	int		ips_ip_g_forward;
270	int		ips_ipv6_forward;
271
272	int		ips_ipmp_hook_emulation; /* ndd variable */
273
274	time_t		ips_ip_g_frag_timeout;
275	clock_t		ips_ip_g_frag_timo_ms;
276
277	struct conn_s	*ips_ip_g_mrouter;
278
279	/* Time since last icmp_pkt_err */
280	clock_t		ips_icmp_pkt_err_last;
281	/* Number of packets sent in burst */
282	uint_t		ips_icmp_pkt_err_sent;
283	/* Used by icmp_send_redirect_v6 for picking random src. */
284	uint_t		ips_icmp_redirect_v6_src_index;
285
286	/* Protected by ip_mi_lock */
287	void		*ips_ip_g_head;		/* Instance Data List Head */
288
289	caddr_t		ips_ip_g_nd;		/* Named Dispatch List Head */
290
291	/* Multirouting stuff */
292	/* Interval (in ms) between consecutive 'bad MTU' warnings */
293	hrtime_t	ips_ip_multirt_log_interval;
294	/* Time since last warning issued. */
295	hrtime_t	ips_multirt_bad_mtu_last_time;
296
297	struct cgtp_filter_ops *ips_ip_cgtp_filter_ops;	/* CGTP hooks */
298	boolean_t	ips_ip_cgtp_filter;	/* Enable/disable CGTP hooks */
299
300	kmutex_t	ips_ip_trash_timer_lock;
301	timeout_id_t	ips_ip_ire_expire_id;	/* IRE expiration timer. */
302	struct ipsq_s	*ips_ipsq_g_head;
303	uint_t		ips_ill_index;	/* Used to assign interface indicies */
304	/* When set search for unused index */
305	boolean_t	ips_ill_index_wrap;
306
307	clock_t		ips_ip_ire_arp_time_elapsed;
308			/* Time since IRE cache last flushed */
309	clock_t		ips_ip_ire_rd_time_elapsed;
310			/* ... redirect IREs last flushed */
311	clock_t		ips_ip_ire_pmtu_time_elapsed;
312			/* Time since path mtu increase */
313
314	uint_t		ips_ip_redirect_cnt;
315			/* Num of redirect routes in ftable */
316	uint_t		ips_ipv6_ire_default_count;
317			/* Number of IPv6 IRE_DEFAULT entries */
318	uint_t		ips_ipv6_ire_default_index;
319			/* Walking IPv6 index used to mod in */
320
321	uint_t		ips_loopback_packets;
322
323	/* NDP/NCE structures for IPv4 and IPv6 */
324	struct ndp_g_s	*ips_ndp4;
325	struct ndp_g_s	*ips_ndp6;
326
327	/* ip_mroute stuff */
328	kmutex_t	ips_ip_g_mrouter_mutex;
329
330	struct mrtstat	*ips_mrtstat;	/* Stats for netstat */
331	int		ips_saved_ip_g_forward;
332
333	/* numvifs is only a hint about the max interface being used. */
334	ushort_t	ips_numvifs;
335	kmutex_t	ips_numvifs_mutex;
336
337	struct vif	*ips_vifs;
338	struct mfcb	*ips_mfcs;	/* kernel routing table	*/
339	struct tbf	*ips_tbfs;
340	/*
341	 * One-back cache used to locate a tunnel's vif,
342	 * given a datagram's src ip address.
343	 */
344	ipaddr_t	ips_last_encap_src;
345	struct vif	*ips_last_encap_vif;
346	kmutex_t	ips_last_encap_lock;	/* Protects the above */
347
348	/*
349	 * reg_vif_num is protected by numvifs_mutex
350	 */
351	/* Whether or not special PIM assert processing is enabled. */
352	ushort_t	ips_reg_vif_num; 	/* Index to Register vif */
353	int		ips_pim_assert;
354
355	union ill_g_head_u *ips_ill_g_heads;   /* ILL List Head */
356
357	kstat_t		*ips_loopback_ksp;
358
359	uint_t		ips_ipif_src_random;
360
361	struct idl_s	*ips_conn_drain_list;	/* Array of conn drain lists */
362	uint_t		ips_conn_drain_list_cnt; /* Count of conn_drain_list */
363	int		ips_conn_drain_list_index; /* Next drain_list */
364
365	/*
366	 * ID used to assign next free one.
367	 * Increases by one. Once it wraps we search for an unused ID.
368	 */
369	uint_t		ips_ip_src_id;
370	boolean_t	ips_srcid_wrapped;
371
372	struct srcid_map *ips_srcid_head;
373	krwlock_t	ips_srcid_lock;
374
375	uint64_t	ips_ipif_g_seqid;
376	union phyint_list_u *ips_phyint_g_list;	/* start of phyint list */
377
378	/*
379	 * Reflects value of FAILBACK variable in IPMP config file
380	 * /etc/default/mpathd. Default value is B_TRUE.
381	 * Set to B_FALSE if user disabled failback by configuring
382	 * "FAILBACK=no" in.mpathd uses SIOCSIPMPFAILBACK ioctl to pass this
383	 * information to kernel.
384	 */
385	boolean_t ips_ipmp_enable_failback;
386
387/* ip_neti.c */
388	hook_family_t	ips_ipv4root;
389	hook_family_t	ips_ipv6root;
390
391	/*
392	 * Hooks for firewalling
393	 */
394	hook_event_t		ips_ip4_physical_in_event;
395	hook_event_t		ips_ip4_physical_out_event;
396	hook_event_t		ips_ip4_forwarding_event;
397	hook_event_t		ips_ip4_loopback_in_event;
398	hook_event_t		ips_ip4_loopback_out_event;
399	hook_event_t		ips_ip4_nic_events;
400	hook_event_t		ips_ip6_physical_in_event;
401	hook_event_t		ips_ip6_physical_out_event;
402	hook_event_t		ips_ip6_forwarding_event;
403	hook_event_t		ips_ip6_loopback_in_event;
404	hook_event_t		ips_ip6_loopback_out_event;
405	hook_event_t		ips_ip6_nic_events;
406
407	hook_event_token_t	ips_ipv4firewall_physical_in;
408	hook_event_token_t	ips_ipv4firewall_physical_out;
409	hook_event_token_t	ips_ipv4firewall_forwarding;
410	hook_event_token_t	ips_ipv4firewall_loopback_in;
411	hook_event_token_t	ips_ipv4firewall_loopback_out;
412	hook_event_token_t	ips_ipv4nicevents;
413	hook_event_token_t	ips_ipv6firewall_physical_in;
414	hook_event_token_t	ips_ipv6firewall_physical_out;
415	hook_event_token_t	ips_ipv6firewall_forwarding;
416	hook_event_token_t	ips_ipv6firewall_loopback_in;
417	hook_event_token_t	ips_ipv6firewall_loopback_out;
418	hook_event_token_t	ips_ipv6nicevents;
419
420	net_handle_t		ips_ipv4_net_data;
421	net_handle_t		ips_ipv6_net_data;
422
423	boolean_t		ips_ipobs_enabled;
424	list_t			ips_ipobs_cb_list;
425	kmutex_t		ips_ipobs_cb_lock;
426	uint_t			ips_ipobs_cb_nwalkers;
427	kcondvar_t		ips_ipobs_cb_cv;
428
429	struct __ldi_ident	*ips_ldi_ident;
430};
431typedef struct ip_stack ip_stack_t;
432
433/* Finding an ip_stack_t */
434#define	CONNQ_TO_IPST(_q)	(Q_TO_CONN(_q)->conn_netstack->netstack_ip)
435#define	ILLQ_TO_IPST(_q)	(((ill_t *)(_q)->q_ptr)->ill_ipst)
436
437#else /* _KERNEL */
438typedef int ip_stack_t;
439#endif /* _KERNEL */
440
441#ifdef	__cplusplus
442}
443#endif
444
445#endif	/* _INET_IP_STACK_H */
446