ip.c revision 3448:aaf16568054b
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/types.h>
29#include <sys/stropts.h>
30#include <sys/stream.h>
31#include <sys/socket.h>
32#include <sys/avl_impl.h>
33#include <net/if.h>
34#include <net/route.h>
35#include <netinet/in.h>
36#include <netinet/ip6.h>
37#include <netinet/udp.h>
38#include <netinet/sctp.h>
39#include <inet/mib2.h>
40#include <inet/common.h>
41#include <inet/ip.h>
42#include <inet/ip_ire.h>
43#include <inet/ip6.h>
44#include <inet/ipclassifier.h>
45#include <inet/mi.h>
46#include <sys/squeue_impl.h>
47
48#include <mdb/mdb_modapi.h>
49#include <mdb/mdb_ks.h>
50
51#define	ADDR_WIDTH 11
52
53typedef struct {
54	const char *bit_name;	/* name of bit */
55	const char *bit_descr;	/* description of bit's purpose */
56} bitname_t;
57
58static const bitname_t squeue_states[] = {
59	{ "SQS_PROC",		"being processed" },
60	{ "SQS_WORKER",		"... by a worker thread" },
61	{ "SQS_ENTER",		"... by an squeue_enter() thread" },
62	{ "SQS_FAST",		"... in fast-path mode" },
63	{ "SQS_USER", 		"A non interrupt user" },
64	{ "SQS_BOUND",		"worker thread bound to CPU" },
65	{ "SQS_PROFILE",	"profiling enabled" },
66	{ "SQS_REENTER",	"re-entered thred" },
67	{ NULL }
68};
69
70typedef struct illif_walk_data {
71	ill_g_head_t ill_g_heads[MAX_G_HEADS];
72	int ill_list;
73	ill_if_t ill_if;
74} illif_walk_data_t;
75
76static int iphdr(uintptr_t, uint_t, int, const mdb_arg_t *);
77static int ip6hdr(uintptr_t, uint_t, int, const mdb_arg_t *);
78
79static int ire_format(uintptr_t addr, const ire_t *irep, uint_t *verbose);
80
81/*
82 * Given the kernel address of an ip_stack_t, return the stackid
83 */
84static int
85ips_to_stackid(uintptr_t kaddr)
86{
87	ip_stack_t ipss;
88	netstack_t nss;
89
90	if (mdb_vread(&ipss, sizeof (ipss), kaddr) == -1) {
91		mdb_warn("failed to read ip_stack_t %p", kaddr);
92		return (0);
93	}
94	kaddr = (uintptr_t)ipss.ips_netstack;
95	if (mdb_vread(&nss, sizeof (nss), kaddr) == -1) {
96		mdb_warn("failed to read netstack_t %p", kaddr);
97		return (0);
98	}
99	return (nss.netstack_stackid);
100}
101
102int
103ip_stacks_walk_init(mdb_walk_state_t *wsp)
104{
105	if (mdb_layered_walk("netstack", wsp) == -1) {
106		mdb_warn("can't walk 'netstack'");
107		return (WALK_ERR);
108	}
109	return (WALK_NEXT);
110}
111
112int
113ip_stacks_walk_step(mdb_walk_state_t *wsp)
114{
115	uintptr_t kaddr;
116	netstack_t nss;
117
118#ifdef DEBUG
119	mdb_printf("DEBUG: ip_stacks_walk_step: addr %p\n", wsp->walk_addr);
120#endif
121	if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) {
122		mdb_warn("can't read netstack at %p", wsp->walk_addr);
123		return (WALK_ERR);
124	}
125	kaddr = (uintptr_t)nss.netstack_modules[NS_IP];
126
127#ifdef DEBUG
128	mdb_printf("DEBUG: ip_stacks_walk_step: ip_stack_t at %p\n", kaddr);
129#endif
130	return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata));
131}
132
133/*
134 * Called with walk_addr being the address of ips_ill_g_heads
135 */
136int
137illif_stack_walk_init(mdb_walk_state_t *wsp)
138{
139	illif_walk_data_t *iw;
140
141	if (wsp->walk_addr == NULL) {
142		mdb_warn("illif_stack supports only local walks\n");
143		return (WALK_ERR);
144	}
145
146	iw = mdb_alloc(sizeof (illif_walk_data_t), UM_SLEEP);
147
148	if (mdb_vread(iw->ill_g_heads, MAX_G_HEADS * sizeof (ill_g_head_t),
149	    wsp->walk_addr) == -1) {
150		mdb_warn("failed to read 'ips_ill_g_heads' at %p",
151		    wsp->walk_addr);
152		mdb_free(iw, sizeof (illif_walk_data_t));
153		return (WALK_ERR);
154	}
155
156	iw->ill_list = 0;
157	wsp->walk_addr = (uintptr_t)iw->ill_g_heads[0].ill_g_list_head;
158	wsp->walk_data = iw;
159
160	return (WALK_NEXT);
161}
162
163int
164illif_stack_walk_step(mdb_walk_state_t *wsp)
165{
166	uintptr_t addr = wsp->walk_addr;
167	illif_walk_data_t *iw = wsp->walk_data;
168	int list = iw->ill_list;
169
170	if (mdb_vread(&iw->ill_if, sizeof (ill_if_t), addr) == -1) {
171		mdb_warn("failed to read ill_if_t at %p", addr);
172		return (WALK_ERR);
173	}
174
175	wsp->walk_addr = (uintptr_t)iw->ill_if.illif_next;
176
177	if (wsp->walk_addr ==
178	    (uintptr_t)iw->ill_g_heads[list].ill_g_list_head) {
179
180		if (++list >= MAX_G_HEADS)
181			return (WALK_DONE);
182
183		iw->ill_list = list;
184		wsp->walk_addr =
185		    (uintptr_t)iw->ill_g_heads[list].ill_g_list_head;
186		return (WALK_NEXT);
187	}
188
189	return (wsp->walk_callback(addr, iw, wsp->walk_cbdata));
190}
191
192void
193illif_stack_walk_fini(mdb_walk_state_t *wsp)
194{
195	mdb_free(wsp->walk_data, sizeof (illif_walk_data_t));
196}
197
198typedef struct illif_cbdata {
199	uint_t ill_flags;
200	uintptr_t ill_addr;
201	int ill_printlist;	/* list to be printed (MAX_G_HEADS for all) */
202	boolean_t ill_printed;
203} illif_cbdata_t;
204
205static int
206illif_cb(uintptr_t addr, const illif_walk_data_t *iw, illif_cbdata_t *id)
207{
208	const char *version;
209
210	if (id->ill_printlist < MAX_G_HEADS &&
211	    id->ill_printlist != iw->ill_list)
212		return (WALK_NEXT);
213
214	if (id->ill_flags & DCMD_ADDRSPEC && id->ill_addr != addr)
215		return (WALK_NEXT);
216
217	if (id->ill_flags & DCMD_PIPE_OUT) {
218		mdb_printf("%p\n", addr);
219		return (WALK_NEXT);
220	}
221
222	switch (iw->ill_list) {
223		case IP_V4_G_HEAD:	version = "v4";	break;
224		case IP_V6_G_HEAD:	version = "v6";	break;
225		default:		version = "??"; break;
226	}
227
228	mdb_printf("%?p %2s %?p %10d %?p %s\n",
229	    addr, version, addr + offsetof(ill_if_t, illif_avl_by_ppa),
230	    iw->ill_if.illif_avl_by_ppa.avl_numnodes,
231	    iw->ill_if.illif_ppa_arena, iw->ill_if.illif_name);
232
233	id->ill_printed = TRUE;
234
235	return (WALK_NEXT);
236}
237
238int
239illif_walk_init(mdb_walk_state_t *wsp)
240{
241	if (mdb_layered_walk("ip_stacks", wsp) == -1) {
242		mdb_warn("can't walk 'ip_stacks'");
243		return (WALK_ERR);
244	}
245
246	return (WALK_NEXT);
247}
248
249int
250illif_walk_step(mdb_walk_state_t *wsp)
251{
252	uintptr_t kaddr;
253
254#ifdef DEBUG
255	mdb_printf("DEBUG: illif_walk_step: addr %p\n", wsp->walk_addr);
256#endif
257
258	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ill_g_heads);
259
260	if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) {
261		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
262		return (WALK_ERR);
263	}
264#ifdef DEBUG
265	mdb_printf("DEBUG: illif_walk_step: ips_ill_g_heads %p\n", kaddr);
266#endif
267
268	if (mdb_pwalk("illif_stack", wsp->walk_callback,
269		wsp->walk_cbdata, kaddr) == -1) {
270		mdb_warn("couldn't walk 'illif_stack' for ips_ill_g_heads %p",
271		    kaddr);
272		return (WALK_ERR);
273	}
274	return (WALK_NEXT);
275}
276
277int
278illif(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
279{
280	illif_cbdata_t id;
281	ill_if_t ill_if;
282	const char *opt_P = NULL;
283	int printlist = MAX_G_HEADS;
284
285	if (mdb_getopts(argc, argv,
286	    'P', MDB_OPT_STR, &opt_P, NULL) != argc)
287		return (DCMD_USAGE);
288
289	if (opt_P != NULL) {
290		if (strcmp("v4", opt_P) == 0) {
291			printlist = IP_V4_G_HEAD;
292		} else if (strcmp("v6", opt_P) == 0) {
293			printlist = IP_V6_G_HEAD;
294		} else {
295			mdb_warn("invalid protocol '%s'\n", opt_P);
296			return (DCMD_USAGE);
297		}
298	}
299
300	if (DCMD_HDRSPEC(flags) && (flags & DCMD_PIPE_OUT) == 0) {
301		mdb_printf("%<u>%?s %2s %?s %10s %?s %-10s%</u>\n",
302		    "ADDR", "IP", "AVLADDR", "NUMNODES", "ARENA", "NAME");
303	}
304
305	id.ill_flags = flags;
306	id.ill_addr = addr;
307	id.ill_printlist = printlist;
308	id.ill_printed = FALSE;
309
310	if (mdb_walk("illif", (mdb_walk_cb_t)illif_cb, &id) == -1) {
311		mdb_warn("can't walk ill_if_t structures");
312		return (DCMD_ERR);
313	}
314
315	if (!(flags & DCMD_ADDRSPEC) || opt_P != NULL || id.ill_printed)
316		return (DCMD_OK);
317
318	/*
319	 * If an address is specified and the walk doesn't find it,
320	 * print it anyway.
321	 */
322	if (mdb_vread(&ill_if, sizeof (ill_if_t), addr) == -1) {
323		mdb_warn("failed to read ill_if_t at %p", addr);
324		return (DCMD_ERR);
325	}
326
327	mdb_printf("%?p %2s %?p %10d %?p %s\n",
328	    addr, "??", addr + offsetof(ill_if_t, illif_avl_by_ppa),
329	    ill_if.illif_avl_by_ppa.avl_numnodes,
330	    ill_if.illif_ppa_arena, ill_if.illif_name);
331
332	return (DCMD_OK);
333}
334
335static void
336illif_help(void)
337{
338	mdb_printf("Options:\n");
339	mdb_printf("\t-P v4 | v6"
340	    "\tfilter interface structures for the specified protocol\n");
341}
342
343int
344ire_walk_init(mdb_walk_state_t *wsp)
345{
346	if (mdb_layered_walk("ire_cache", wsp) == -1) {
347		mdb_warn("can't walk 'ire_cache'");
348		return (WALK_ERR);
349	}
350
351	return (WALK_NEXT);
352}
353
354int
355ire_walk_step(mdb_walk_state_t *wsp)
356{
357	ire_t ire;
358
359	if (mdb_vread(&ire, sizeof (ire), wsp->walk_addr) == -1) {
360		mdb_warn("can't read ire at %p", wsp->walk_addr);
361		return (WALK_ERR);
362	}
363
364	return (wsp->walk_callback(wsp->walk_addr, &ire, wsp->walk_cbdata));
365}
366
367int
368ire_ctable_walk_init(mdb_walk_state_t *wsp)
369{
370	if (mdb_layered_walk("ip_stacks", wsp) == -1) {
371		mdb_warn("can't walk 'ip_stacks'");
372		return (WALK_ERR);
373	}
374
375	return (WALK_NEXT);
376}
377
378int
379ire_ctable_walk_step(mdb_walk_state_t *wsp)
380{
381	uintptr_t kaddr;
382	irb_t *irb;
383	int verbose = 0;
384	uint32_t cache_table_size;
385	int i;
386
387#ifdef DEBUG
388	mdb_printf("DEBUG: ire_ctable_walk_step: addr %p\n", wsp->walk_addr);
389#endif
390
391	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table_size);
392
393	if (mdb_vread(&cache_table_size, sizeof (uint32_t), kaddr) == -1) {
394		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
395		return (WALK_ERR);
396	}
397#ifdef DEBUG
398	mdb_printf("DEBUG: ire_ctable_walk_step: ips_ip_cache_table_size %u\n",
399		cache_table_size);
400#endif
401
402	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table);
403	if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) {
404		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
405		return (WALK_ERR);
406	}
407#ifdef DEBUG
408	mdb_printf("DEBUG: ire_ctable_walk_step: ips_ip_cache_table %p\n",
409	    kaddr);
410#endif
411
412	irb = mdb_alloc(sizeof (irb_t) * cache_table_size, UM_SLEEP|UM_GC);
413	if (mdb_vread(irb, sizeof (irb_t) * cache_table_size, kaddr) == -1) {
414		mdb_warn("can't read irb at %p", kaddr);
415		return (WALK_ERR);
416	}
417	for (i = 0; i < cache_table_size; i++) {
418		kaddr = (uintptr_t)irb[i].irb_ire;
419#ifdef DEBUG
420		mdb_printf("DEBUG: ire_ctable_walk_step: %d ire %p\n",
421		    i, kaddr);
422#endif
423
424		if (mdb_pwalk("ire_next", (mdb_walk_cb_t)ire_format, &verbose,
425			kaddr) == -1) {
426			mdb_warn("can't walk 'ire_next' for ire %p", kaddr);
427			return (WALK_ERR);
428		}
429	}
430	return (WALK_NEXT);
431}
432
433/* ARGSUSED */
434int
435ire_next_walk_init(mdb_walk_state_t *wsp)
436{
437#ifdef DEBUG
438	mdb_printf("DEBUG: ire_next_walk_init: addr %p\n", wsp->walk_addr);
439#endif
440	return (WALK_NEXT);
441}
442
443int
444ire_next_walk_step(mdb_walk_state_t *wsp)
445{
446	ire_t ire;
447	int status;
448
449#ifdef DEBUG
450	mdb_printf("DEBUG: ire_next_walk_step: addr %p\n", wsp->walk_addr);
451#endif
452
453	if (wsp->walk_addr == NULL)
454		return (WALK_DONE);
455
456	if (mdb_vread(&ire, sizeof (ire), wsp->walk_addr) == -1) {
457		mdb_warn("can't read ire at %p", wsp->walk_addr);
458		return (WALK_ERR);
459	}
460	status = wsp->walk_callback(wsp->walk_addr, &ire,
461	    wsp->walk_cbdata);
462
463	if (status != WALK_NEXT)
464		return (status);
465
466	wsp->walk_addr = (uintptr_t)ire.ire_next;
467#ifdef DEBUG
468	mdb_printf("DEBUG: ire_ctable_walk_step: next %p\n", wsp->walk_addr);
469#endif
470	return (status);
471}
472
473static int
474ire_format(uintptr_t addr, const ire_t *irep, uint_t *verbose)
475{
476	static const mdb_bitmask_t tmasks[] = {
477		{ "BROADCAST",	IRE_BROADCAST,		IRE_BROADCAST	},
478		{ "DEFAULT",	IRE_DEFAULT,		IRE_DEFAULT	},
479		{ "LOCAL",	IRE_LOCAL,		IRE_LOCAL	},
480		{ "LOOPBACK",	IRE_LOOPBACK,		IRE_LOOPBACK	},
481		{ "PREFIX",	IRE_PREFIX,		IRE_PREFIX	},
482		{ "CACHE",	IRE_CACHE,		IRE_CACHE	},
483		{ "IF_NORESOLVER", IRE_IF_NORESOLVER,	IRE_IF_NORESOLVER },
484		{ "IF_RESOLVER", IRE_IF_RESOLVER,	IRE_IF_RESOLVER	},
485		{ "HOST",	IRE_HOST,		IRE_HOST	},
486		{ "HOST_REDIRECT", IRE_HOST_REDIRECT,	IRE_HOST_REDIRECT },
487		{ "MIPRTUN",	IRE_MIPRTUN,		IRE_MIPRTUN	},
488		{ NULL,		0,			0		}
489	};
490
491	static const mdb_bitmask_t mmasks[] = {
492		{ "CONDEMNED",	IRE_MARK_CONDEMNED,	IRE_MARK_CONDEMNED },
493		{ "NORECV",	IRE_MARK_NORECV,	IRE_MARK_NORECV	},
494		{ "HIDDEN",	IRE_MARK_HIDDEN,	IRE_MARK_HIDDEN	},
495		{ "NOADD",	IRE_MARK_NOADD,		IRE_MARK_NOADD	},
496		{ "TEMPORARY",	IRE_MARK_TEMPORARY,	IRE_MARK_TEMPORARY },
497		{ NULL,		0,			0		}
498	};
499
500	static const mdb_bitmask_t fmasks[] = {
501		{ "UP",		RTF_UP,			RTF_UP		},
502		{ "GATEWAY",	RTF_GATEWAY,		RTF_GATEWAY	},
503		{ "HOST",	RTF_HOST,		RTF_HOST	},
504		{ "REJECT",	RTF_REJECT,		RTF_REJECT	},
505		{ "DYNAMIC",	RTF_DYNAMIC,		RTF_DYNAMIC	},
506		{ "MODIFIED",	RTF_MODIFIED,		RTF_MODIFIED	},
507		{ "DONE",	RTF_DONE,		RTF_DONE	},
508		{ "MASK",	RTF_MASK,		RTF_MASK	},
509		{ "CLONING",	RTF_CLONING,		RTF_CLONING	},
510		{ "XRESOLVE",	RTF_XRESOLVE,		RTF_XRESOLVE	},
511		{ "LLINFO",	RTF_LLINFO,		RTF_LLINFO	},
512		{ "STATIC",	RTF_STATIC,		RTF_STATIC	},
513		{ "BLACKHOLE",	RTF_BLACKHOLE,		RTF_BLACKHOLE	},
514		{ "PRIVATE",	RTF_PRIVATE,		RTF_PRIVATE	},
515		{ "PROTO2",	RTF_PROTO2,		RTF_PROTO2	},
516		{ "PROTO1",	RTF_PROTO1,		RTF_PROTO1	},
517		{ "MULTIRT",	RTF_MULTIRT,		RTF_MULTIRT	},
518		{ "SETSRC",	RTF_SETSRC,		RTF_SETSRC	},
519		{ NULL,		0,			0		}
520	};
521
522	if (irep->ire_ipversion == 6 && *verbose) {
523
524		mdb_printf("%<b>%?p%</b> %40N <%hb>\n"
525		    "%?s %40N <%hb>\n"
526		    "%?s %40d %4d <%hb>\n",
527		    addr, &irep->ire_src_addr_v6, irep->ire_type, tmasks,
528		    "", &irep->ire_addr_v6, (ushort_t)irep->ire_marks, mmasks,
529		    "", ips_to_stackid((uintptr_t)irep->ire_ipst),
530		    irep->ire_zoneid,
531		    irep->ire_flags, fmasks);
532
533	} else if (irep->ire_ipversion == 6) {
534
535		mdb_printf("%?p %30N %30N %5d %4d\n",
536		    addr, &irep->ire_src_addr_v6,
537		    &irep->ire_addr_v6,
538		    ips_to_stackid((uintptr_t)irep->ire_ipst),
539		    irep->ire_zoneid);
540
541	} else if (*verbose) {
542
543		mdb_printf("%<b>%?p%</b> %40I <%hb>\n"
544		    "%?s %40I <%hb>\n"
545		    "%?s %40d <%hb>\n",
546		    addr, irep->ire_src_addr, irep->ire_type, tmasks,
547		    "", irep->ire_addr, (ushort_t)irep->ire_marks, mmasks,
548		    "", ips_to_stackid((uintptr_t)irep->ire_ipst),
549		    irep->ire_zoneid, irep->ire_flags, fmasks);
550
551	} else {
552
553		mdb_printf("%?p %30I %30I %5d %4d\n", addr, irep->ire_src_addr,
554		    irep->ire_addr, ips_to_stackid((uintptr_t)irep->ire_ipst),
555		    irep->ire_zoneid);
556	}
557
558	return (WALK_NEXT);
559}
560
561/*
562 * There are faster ways to do this.  Given the interactive nature of this
563 * use I don't think its worth much effort.
564 */
565static unsigned short
566ipcksum(void *p, int len)
567{
568	int32_t	sum = 0;
569
570	while (len > 1) {
571		/* alignment */
572		sum += *(uint16_t *)p;
573		p = (char *)p + sizeof (uint16_t);
574		if (sum & 0x80000000)
575			sum = (sum & 0xFFFF) + (sum >> 16);
576		len -= 2;
577	}
578
579	if (len)
580		sum += (uint16_t)*(unsigned char *)p;
581
582	while (sum >> 16)
583		sum = (sum & 0xFFFF) + (sum >> 16);
584
585	return (~sum);
586}
587
588static const mdb_bitmask_t tcp_flags[] = {
589	{ "SYN",	TH_SYN,		TH_SYN	},
590	{ "ACK",	TH_ACK,		TH_ACK	},
591	{ "FIN",	TH_FIN,		TH_FIN	},
592	{ "RST",	TH_RST,		TH_RST	},
593	{ "PSH",	TH_PUSH,	TH_PUSH	},
594	{ "ECE",	TH_ECE,		TH_ECE	},
595	{ "CWR",	TH_CWR,		TH_CWR	},
596	{ NULL,		0,		0	}
597};
598
599static void
600tcphdr_print(struct tcphdr *tcph)
601{
602	in_port_t	sport, dport;
603	tcp_seq		seq, ack;
604	uint16_t	win, urp;
605
606	mdb_printf("%<b>TCP header%</b>\n");
607
608	mdb_nhconvert(&sport, &tcph->th_sport, sizeof (sport));
609	mdb_nhconvert(&dport, &tcph->th_dport, sizeof (dport));
610	mdb_nhconvert(&seq, &tcph->th_seq, sizeof (seq));
611	mdb_nhconvert(&ack, &tcph->th_ack, sizeof (ack));
612	mdb_nhconvert(&win, &tcph->th_win, sizeof (win));
613	mdb_nhconvert(&urp, &tcph->th_urp, sizeof (urp));
614
615	mdb_printf("%<u>%6s %6s %10s %10s %4s %5s %5s %5s %-15s%</u>\n",
616	    "SPORT", "DPORT", "SEQ", "ACK", "HLEN", "WIN", "CSUM", "URP",
617	    "FLAGS");
618	mdb_printf("%6hu %6hu %10u %10u %4d %5hu %5hu %5hu <%b>\n",
619	    sport, dport, seq, ack, tcph->th_off << 2, win,
620	    tcph->th_sum, urp, tcph->th_flags, tcp_flags);
621	mdb_printf("0x%04x 0x%04x 0x%08x 0x%08x\n\n",
622	    sport, dport, seq, ack);
623}
624
625/* ARGSUSED */
626static int
627tcphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
628{
629	struct tcphdr	tcph;
630
631	if (!(flags & DCMD_ADDRSPEC))
632		return (DCMD_USAGE);
633
634	if (mdb_vread(&tcph, sizeof (tcph), addr) == -1) {
635		mdb_warn("failed to read TCP header at %p", addr);
636		return (DCMD_ERR);
637	}
638	tcphdr_print(&tcph);
639	return (DCMD_OK);
640}
641
642static void
643udphdr_print(struct udphdr *udph)
644{
645	in_port_t	sport, dport;
646	uint16_t	hlen;
647
648	mdb_printf("%<b>UDP header%</b>\n");
649
650	mdb_nhconvert(&sport, &udph->uh_sport, sizeof (sport));
651	mdb_nhconvert(&dport, &udph->uh_dport, sizeof (dport));
652	mdb_nhconvert(&hlen, &udph->uh_ulen, sizeof (hlen));
653
654	mdb_printf("%<u>%14s %14s %5s %6s%</u>\n",
655	    "SPORT", "DPORT", "LEN", "CSUM");
656	mdb_printf("%5hu (0x%04x) %5hu (0x%04x) %5hu 0x%04hx\n\n", sport, sport,
657	    dport, dport, hlen, udph->uh_sum);
658}
659
660/* ARGSUSED */
661static int
662udphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
663{
664	struct udphdr	udph;
665
666	if (!(flags & DCMD_ADDRSPEC))
667		return (DCMD_USAGE);
668
669	if (mdb_vread(&udph, sizeof (udph), addr) == -1) {
670		mdb_warn("failed to read UDP header at %p", addr);
671		return (DCMD_ERR);
672	}
673	udphdr_print(&udph);
674	return (DCMD_OK);
675}
676
677static void
678sctphdr_print(sctp_hdr_t *sctph)
679{
680	in_port_t sport, dport;
681
682	mdb_printf("%<b>SCTP header%</b>\n");
683	mdb_nhconvert(&sport, &sctph->sh_sport, sizeof (sport));
684	mdb_nhconvert(&dport, &sctph->sh_dport, sizeof (dport));
685
686	mdb_printf("%<u>%14s %14s %10s %10s%</u>\n",
687	    "SPORT", "DPORT", "VTAG", "CHKSUM");
688	mdb_printf("%5hu (0x%04x) %5hu (0x%04x) %10u 0x%08x\n\n", sport, sport,
689	    dport, dport, sctph->sh_verf, sctph->sh_chksum);
690}
691
692/* ARGSUSED */
693static int
694sctphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
695{
696	sctp_hdr_t sctph;
697
698	if (!(flags & DCMD_ADDRSPEC))
699		return (DCMD_USAGE);
700
701	if (mdb_vread(&sctph, sizeof (sctph), addr) == -1) {
702		mdb_warn("failed to read SCTP header at %p", addr);
703		return (DCMD_ERR);
704	}
705
706	sctphdr_print(&sctph);
707	return (DCMD_OK);
708}
709
710static int
711transport_hdr(int proto, uintptr_t addr)
712{
713	mdb_printf("\n");
714	switch (proto) {
715	case IPPROTO_TCP: {
716		struct tcphdr tcph;
717
718		if (mdb_vread(&tcph, sizeof (tcph), addr) == -1) {
719			mdb_warn("failed to read TCP header at %p", addr);
720			return (DCMD_ERR);
721		}
722		tcphdr_print(&tcph);
723		break;
724	}
725	case IPPROTO_UDP:  {
726		struct udphdr udph;
727
728		if (mdb_vread(&udph, sizeof (udph), addr) == -1) {
729			mdb_warn("failed to read UDP header at %p", addr);
730			return (DCMD_ERR);
731		}
732		udphdr_print(&udph);
733		break;
734	}
735	case IPPROTO_SCTP: {
736		sctp_hdr_t sctph;
737
738		if (mdb_vread(&sctph, sizeof (sctph), addr) == -1) {
739			mdb_warn("failed to read SCTP header at %p", addr);
740			return (DCMD_ERR);
741		}
742		sctphdr_print(&sctph);
743		break;
744	}
745	default:
746		break;
747	}
748
749	return (DCMD_OK);
750}
751
752static const mdb_bitmask_t ip_flags[] = {
753	{ "DF",	IPH_DF, IPH_DF	},
754	{ "MF", IPH_MF,	IPH_MF	},
755	{ NULL, 0,	0	}
756};
757
758/* ARGSUSED */
759static int
760iphdr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
761{
762	uint_t		verbose = FALSE, force = FALSE;
763	ipha_t		iph[1];
764	uint16_t	ver, totlen, hdrlen, ipid, off, csum;
765	uintptr_t	nxt_proto;
766	char		exp_csum[8];
767
768	if (mdb_getopts(argc, argv,
769	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
770	    'f', MDB_OPT_SETBITS, TRUE, &force, NULL) != argc)
771		return (DCMD_USAGE);
772
773	if (mdb_vread(iph, sizeof (*iph), addr) == -1) {
774		mdb_warn("failed to read IPv4 header at %p", addr);
775		return (DCMD_ERR);
776	}
777
778	ver = (iph->ipha_version_and_hdr_length & 0xf0) >> 4;
779	if (ver != IPV4_VERSION) {
780		if (ver == IPV6_VERSION) {
781			return (ip6hdr(addr, flags, argc, argv));
782		} else if (!force) {
783			mdb_warn("unknown IP version: %d\n", ver);
784			return (DCMD_ERR);
785		}
786	}
787
788	mdb_printf("%<b>IPv4 header%</b>\n");
789	mdb_printf("%-34s %-34s\n"
790	    "%<u>%-4s %-4s %-5s %-5s %-6s %-5s %-5s %-6s %-8s %-6s%</u>\n",
791	    "SRC", "DST",
792	    "HLEN", "TOS", "LEN", "ID", "OFFSET", "TTL", "PROTO", "CHKSUM",
793	    "EXP-CSUM", "FLGS");
794
795	hdrlen = (iph->ipha_version_and_hdr_length & 0x0f) << 2;
796	mdb_nhconvert(&totlen, &iph->ipha_length, sizeof (totlen));
797	mdb_nhconvert(&ipid, &iph->ipha_ident, sizeof (ipid));
798	mdb_nhconvert(&off, &iph->ipha_fragment_offset_and_flags, sizeof (off));
799	if (hdrlen == IP_SIMPLE_HDR_LENGTH) {
800		if ((csum = ipcksum(iph, sizeof (*iph))) != 0)
801			csum = ~(~csum + ~iph->ipha_hdr_checksum);
802		else
803			csum = iph->ipha_hdr_checksum;
804		mdb_snprintf(exp_csum, 8, "%u", csum);
805	} else {
806		mdb_snprintf(exp_csum, 8, "<n/a>");
807	}
808
809	mdb_printf("%-34I %-34I%\n"
810	    "%-4d %-4d %-5hu %-5hu %-6hu %-5hu %-5hu %-6u %-8s <%5hb>\n",
811	    iph->ipha_src, iph->ipha_dst,
812	    hdrlen, iph->ipha_type_of_service, totlen, ipid,
813	    (off << 3) & 0xffff, iph->ipha_ttl, iph->ipha_protocol,
814	    iph->ipha_hdr_checksum, exp_csum, off, ip_flags);
815
816	if (verbose) {
817		nxt_proto = addr + hdrlen;
818		return (transport_hdr(iph->ipha_protocol, nxt_proto));
819	} else {
820		return (DCMD_OK);
821	}
822}
823
824/* ARGSUSED */
825static int
826ip6hdr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
827{
828	uint_t		verbose = FALSE, force = FALSE;
829	ip6_t		iph[1];
830	int		ver, class, flow;
831	uint16_t	plen;
832	uintptr_t	nxt_proto;
833
834	if (mdb_getopts(argc, argv,
835	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
836	    'f', MDB_OPT_SETBITS, TRUE, &force, NULL) != argc)
837		return (DCMD_USAGE);
838
839	if (mdb_vread(iph, sizeof (*iph), addr) == -1) {
840		mdb_warn("failed to read IPv6 header at %p", addr);
841		return (DCMD_ERR);
842	}
843
844	ver = (iph->ip6_vfc & 0xf0) >> 4;
845	if (ver != IPV6_VERSION) {
846		if (ver == IPV4_VERSION) {
847			return (iphdr(addr, flags, argc, argv));
848		} else if (!force) {
849			mdb_warn("unknown IP version: %d\n", ver);
850			return (DCMD_ERR);
851		}
852	}
853
854	mdb_printf("%<b>IPv6 header%</b>\n");
855	mdb_printf("%<u>%-26s %-26s %4s %7s %5s %3s %3s%</u>\n",
856	    "SRC", "DST", "TCLS", "FLOW-ID", "PLEN", "NXT", "HOP");
857
858	class = (iph->ip6_vcf & IPV6_FLOWINFO_TCLASS) >> 20;
859	mdb_nhconvert(&class, &class, sizeof (class));
860	flow = iph->ip6_vcf & IPV6_FLOWINFO_FLOWLABEL;
861	mdb_nhconvert(&flow, &flow, sizeof (flow));
862	mdb_nhconvert(&plen, &iph->ip6_plen, sizeof (plen));
863
864	mdb_printf("%-26N %-26N %4d %7d %5hu %3d %3d\n",
865	    &iph->ip6_src, &iph->ip6_dst,
866	    class, flow, plen, iph->ip6_nxt, iph->ip6_hlim);
867
868	if (verbose) {
869		nxt_proto = addr + sizeof (ip6_t);
870		return (transport_hdr(iph->ip6_nxt, nxt_proto));
871	} else {
872		return (DCMD_OK);
873	}
874}
875
876int
877ire(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
878{
879	uint_t verbose = FALSE;
880	ire_t ire;
881
882	if (mdb_getopts(argc, argv,
883	    'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) != argc)
884		return (DCMD_USAGE);
885
886	if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
887
888		if (verbose) {
889			mdb_printf("%?s %40s %-20s%\n"
890			    "%?s %40s %-20s%\n"
891			    "%<u>%?s %40s %4s %-20s%</u>\n",
892			    "ADDR", "SRC", "TYPE",
893			    "", "DST", "MARKS",
894			    "", "STACK", "ZONE", "FLAGS");
895		} else {
896			mdb_printf("%<u>%?s %30s %30s %5s %4s%</u>\n",
897			    "ADDR", "SRC", "DST", "STACK", "ZONE");
898		}
899	}
900
901	if (flags & DCMD_ADDRSPEC) {
902		(void) mdb_vread(&ire, sizeof (ire_t), addr);
903		(void) ire_format(addr, &ire, &verbose);
904	} else if (mdb_walk("ire", (mdb_walk_cb_t)ire_format, &verbose) == -1) {
905		mdb_warn("failed to walk ire table");
906		return (DCMD_ERR);
907	}
908
909	return (DCMD_OK);
910}
911
912static size_t
913mi_osize(const queue_t *q)
914{
915	/*
916	 * The code in common/inet/mi.c allocates an extra word to store the
917	 * size of the allocation.  An mi_o_s is thus a size_t plus an mi_o_s.
918	 */
919	struct mi_block {
920		size_t mi_nbytes;
921		struct mi_o_s mi_o;
922	} m;
923
924	if (mdb_vread(&m, sizeof (m), (uintptr_t)q->q_ptr -
925	    sizeof (m)) == sizeof (m))
926		return (m.mi_nbytes - sizeof (m));
927
928	return (0);
929}
930
931static void
932ip_ill_qinfo(const queue_t *q, char *buf, size_t nbytes)
933{
934	char name[32];
935	ill_t ill;
936
937	if (mdb_vread(&ill, sizeof (ill),
938	    (uintptr_t)q->q_ptr) == sizeof (ill) &&
939	    mdb_readstr(name, sizeof (name), (uintptr_t)ill.ill_name) > 0)
940		(void) mdb_snprintf(buf, nbytes, "if: %s", name);
941}
942
943void
944ip_qinfo(const queue_t *q, char *buf, size_t nbytes)
945{
946	size_t size = mi_osize(q);
947
948	if (size == sizeof (ill_t))
949		ip_ill_qinfo(q, buf, nbytes);
950}
951
952uintptr_t
953ip_rnext(const queue_t *q)
954{
955	size_t size = mi_osize(q);
956	ill_t ill;
957
958	if (size == sizeof (ill_t) && mdb_vread(&ill, sizeof (ill),
959	    (uintptr_t)q->q_ptr) == sizeof (ill))
960		return ((uintptr_t)ill.ill_rq);
961
962	return (NULL);
963}
964
965uintptr_t
966ip_wnext(const queue_t *q)
967{
968	size_t size = mi_osize(q);
969	ill_t ill;
970
971	if (size == sizeof (ill_t) && mdb_vread(&ill, sizeof (ill),
972	    (uintptr_t)q->q_ptr) == sizeof (ill))
973		return ((uintptr_t)ill.ill_wq);
974
975	return (NULL);
976}
977
978/*
979 * Print the core fields in an squeue_t.  With the "-v" argument,
980 * provide more verbose output.
981 */
982static int
983squeue(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
984{
985	unsigned int	i;
986	unsigned int	verbose = FALSE;
987	const int	SQUEUE_STATEDELT = (int)(sizeof (uintptr_t) + 9);
988	boolean_t	arm;
989	squeue_t	squeue;
990
991	if (!(flags & DCMD_ADDRSPEC)) {
992		if (mdb_walk_dcmd("genunix`squeue_cache", "ip`squeue",
993		    argc, argv) == -1) {
994			mdb_warn("failed to walk squeue cache");
995			return (DCMD_ERR);
996		}
997		return (DCMD_OK);
998	}
999
1000	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL)
1001	    != argc)
1002		return (DCMD_USAGE);
1003
1004	if (!DCMD_HDRSPEC(flags) && verbose)
1005		mdb_printf("\n\n");
1006
1007	if (DCMD_HDRSPEC(flags) || verbose) {
1008		mdb_printf("%?s %-5s %-3s %?s %?s %?s\n",
1009		    "ADDR", "STATE", "CPU",
1010		    "FIRST", "LAST", "WORKER");
1011	}
1012
1013	if (mdb_vread(&squeue, sizeof (squeue_t), addr) == -1) {
1014		mdb_warn("cannot read squeue_t at %p", addr);
1015		return (DCMD_ERR);
1016	}
1017
1018	mdb_printf("%0?p %05x %3d %0?p %0?p %0?p\n",
1019	    addr, squeue.sq_state, squeue.sq_bind,
1020	    squeue.sq_first, squeue.sq_last, squeue.sq_worker);
1021
1022	if (!verbose)
1023		return (DCMD_OK);
1024
1025	arm = B_TRUE;
1026	for (i = 0; squeue_states[i].bit_name != NULL; i++) {
1027		if (((squeue.sq_state) & (1 << i)) == 0)
1028			continue;
1029
1030		if (arm) {
1031			mdb_printf("%*s|\n", SQUEUE_STATEDELT, "");
1032			mdb_printf("%*s+-->  ", SQUEUE_STATEDELT, "");
1033			arm = B_FALSE;
1034		} else
1035			mdb_printf("%*s      ", SQUEUE_STATEDELT, "");
1036
1037		mdb_printf("%-12s %s\n", squeue_states[i].bit_name,
1038		    squeue_states[i].bit_descr);
1039	}
1040
1041	return (DCMD_OK);
1042}
1043
1044static void
1045ip_squeue_help(void)
1046{
1047	mdb_printf("Print the core information for a given NCA squeue_t.\n\n");
1048	mdb_printf("Options:\n");
1049	mdb_printf("\t-v\tbe verbose (more descriptive)\n");
1050}
1051
1052static const mdb_dcmd_t dcmds[] = {
1053	{ "illif", "?[-P v4 | v6]",
1054	    "display or filter IP Lower Level InterFace structures", illif,
1055	    illif_help },
1056	{ "iphdr", ":[-vf]", "display an IPv4 header", iphdr },
1057	{ "ip6hdr", ":[-vf]", "display an IPv6 header", ip6hdr },
1058	{ "ire", "?[-v]", "display Internet Route Entry structures", ire },
1059	{ "squeue", ":[-v]", "print core squeue_t info", squeue,
1060	    ip_squeue_help },
1061	{ "tcphdr", ":", "display a TCP header", tcphdr },
1062	{ "udphdr", ":", "display an UDP header", udphdr },
1063	{ "sctphdr", ":", "display an SCTP header", sctphdr },
1064	{ NULL }
1065};
1066
1067static const mdb_walker_t walkers[] = {
1068	{ "illif", "walk list of ill interface types for all stacks",
1069		illif_walk_init, illif_walk_step, NULL },
1070	{ "illif_stack", "walk list of ill interface types",
1071		illif_stack_walk_init, illif_stack_walk_step,
1072		illif_stack_walk_fini },
1073	{ "ire", "walk active ire_t structures",
1074		ire_walk_init, ire_walk_step, NULL },
1075	{ "ire_ctable", "walk ire_t structures in the ctable",
1076		ire_ctable_walk_init, ire_ctable_walk_step, NULL },
1077	{ "ire_next", "walk ire_t structures in the ctable",
1078		ire_next_walk_init, ire_next_walk_step, NULL },
1079	{ "ip_stacks", "walk all the ip_stack_t",
1080		ip_stacks_walk_init, ip_stacks_walk_step, NULL },
1081	{ NULL }
1082};
1083
1084static const mdb_qops_t ip_qops = { ip_qinfo, ip_rnext, ip_wnext };
1085static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1086
1087const mdb_modinfo_t *
1088_mdb_init(void)
1089{
1090	GElf_Sym sym;
1091
1092	if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0)
1093		mdb_qops_install(&ip_qops, (uintptr_t)sym.st_value);
1094
1095	return (&modinfo);
1096}
1097
1098void
1099_mdb_fini(void)
1100{
1101	GElf_Sym sym;
1102
1103	if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0)
1104		mdb_qops_remove(&ip_qops, (uintptr_t)sym.st_value);
1105}
1106