1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 *
25 * ipv4.c, Code implementing the IPv4 internet protocol.
26 */
27
28#pragma ident	"%Z%%M%	%I%	%E% SMI"
29
30#include <sys/types.h>
31#include <socket_impl.h>
32#include <socket_inet.h>
33#include <sys/sysmacros.h>
34#include <sys/socket.h>
35#include <netinet/in_systm.h>
36#include <netinet/in.h>
37#include <netinet/ip.h>
38#include <netinet/udp.h>
39#include <net/if_arp.h>
40#include <sys/promif.h>
41#include <sys/bootconf.h>
42#include <sys/fcntl.h>
43#include <sys/salib.h>
44
45#include "icmp4.h"
46#include "ipv4.h"
47#include "ipv4_impl.h"
48#include "mac.h"
49#include "mac_impl.h"
50#include "v4_sum_impl.h"
51#include <sys/bootdebug.h>
52
53static struct ip_frag	fragment[FRAG_MAX];	/* ip fragment buffers */
54static int		fragments;		/* Number of fragments */
55static uint8_t		ttl = MAXTTL;		/* IP ttl */
56static struct in_addr	myip;			/* our network-order IP addr */
57static struct in_addr	mynet;			/* net-order netaddr */
58static struct in_addr	netmask =
59	{ 0xff, 0xff, 0xff, 0xff };		/* our network-order netmask */
60static boolean_t	netmask_set = B_FALSE;	/* has anyone set netmask? */
61static struct in_addr	defaultrouter;		/* net-order defaultrouter */
62static int		promiscuous;		/* promiscuous mode */
63static struct routing table[IPV4_ROUTE_TABLE_SIZE];
64
65static uint16_t	g_ip_id;
66
67#ifdef	DEBUG
68#define	FRAG_DEBUG
69#endif	/* DEBUG */
70
71#ifdef FRAG_DEBUG
72/*
73 * display the fragment list. For debugging purposes.
74 */
75static void
76frag_disp(uint16_t size)
77{
78	int	i;
79	uint_t	total = 0;
80
81	printf("Dumping fragment info: (%d)\n\n", fragments);
82	printf("More:\tOffset:\tDatap:\t\tIPid:\t\tIPlen:\tIPhlen:\n");
83	for (i = 0; i < FRAG_MAX; i++) {
84		if (fragment[i].mp == NULL)
85			continue;
86		printf("%d\t%d\t0x%x\t%d\t\t%d\t%d\n", fragment[i].more,
87		    fragment[i].offset, fragment[i].mp->b_rptr,
88		    fragment[i].ipid, fragment[i].iplen, fragment[i].iphlen);
89		total += (fragment[i].iplen - fragment[i].iphlen);
90	}
91	printf("Total length is: %d. It should be: %d\n\n", total, size);
92}
93#endif /* FRAG_DEBUG */
94
95/*
96 * This function returns index of fragment 0 of the current fragmented DGRAM
97 * (which would contain the transport header). Return the fragment number
98 * for success, -1 if we don't yet have the first fragment.
99 */
100static int
101frag_first(void)
102{
103	int		i;
104
105	if (fragments == 0)
106		return (-1);
107
108	for (i = 0; i < FRAG_MAX; i++) {
109		if (fragment[i].mp != NULL && fragment[i].offset == 0)
110			return (i);
111	}
112	return (-1);
113}
114
115/*
116 * This function returns index of the last fragment of the current DGRAM.
117 * Returns the fragment number for success, -1 if we don't yet have the
118 * last fragment.
119 */
120static int
121frag_last(void)
122{
123	int		i;
124
125	if (fragments == 0)
126		return (-1);
127
128	for (i = 0; i < FRAG_MAX; i++) {
129		if (fragment[i].mp != NULL && !fragment[i].more)
130			return (i);
131	}
132	return (-1);
133}
134
135/*
136 * This function adds a fragment to the current pkt fragment list. Returns
137 * FRAG_NOSLOTS if there are no more slots, FRAG_DUP if the fragment is
138 * a duplicate, or FRAG_SUCCESS if it is successful.
139 */
140static int
141frag_add(int16_t offset, mblk_t *mp, uint16_t ipid,
142    int16_t iplen, int16_t iphlen, uint8_t ipp)
143{
144	int	i;
145	int16_t	true_offset = IPV4_OFFSET(offset);
146
147	/* first pass - look for duplicates */
148	for (i = 0; i < FRAG_MAX; i++) {
149		if (fragment[i].mp != NULL &&
150		    fragment[i].offset == true_offset)
151			return (FRAG_DUP);
152	}
153
154	/* second pass - fill in empty slot */
155	for (i = 0; i < FRAG_MAX; i++) {
156		if (fragment[i].mp == NULL) {
157			fragment[i].more = (offset & IP_MF);
158			fragment[i].offset = true_offset;
159			fragment[i].mp = mp;
160			fragment[i].ipid = ipid;
161			fragment[i].iplen = iplen;
162			fragment[i].iphlen = iphlen;
163			fragment[i].ipp = ipp;
164			fragments++;
165			return (FRAG_SUCCESS);
166		}
167	}
168	return (FRAG_NOSLOTS);
169}
170
171/*
172 * Nuke a fragment.
173 */
174static void
175frag_free(int index)
176{
177	if (fragment[index].mp != NULL) {
178		freeb(fragment[index].mp);
179		fragments--;
180	}
181	bzero((caddr_t)&fragment[index], sizeof (struct ip_frag));
182}
183
184/*
185 * zero the frag list.
186 */
187static void
188frag_flush(void)
189{
190	int i;
191
192	for (i = 0; i < FRAG_MAX; i++)
193		frag_free(i);
194
195	fragments = 0;
196}
197
198/*
199 * Analyze the fragment list - see if we captured all our fragments.
200 *
201 * Returns TRUE if we've got all the fragments, and FALSE if we don't.
202 */
203static int
204frag_chk(void)
205{
206	int		i, first_frag, last_frag;
207	int16_t		actual, total;
208	uint16_t	ip_id;
209	uint8_t		ipp;
210
211	if (fragments == 0 || (first_frag = frag_first()) < 0 ||
212	    (last_frag = frag_last()) < 0)
213		return (FALSE);
214
215	/*
216	 * Validate the ipid's of our fragments - nuke those that don't
217	 * match the id of the first fragment or don't match the IP
218	 * protocol of the first fragment.
219	 */
220	ip_id = fragment[first_frag].ipid;
221	ipp = fragment[first_frag].ipp;
222	for (i = 0; i < FRAG_MAX; i++) {
223		if (fragment[i].mp != NULL && ip_id != fragment[i].ipid &&
224			fragment[i].ipp != ipp) {
225#ifdef FRAG_DEBUG
226			printf("ipv4: Frag id mismatch: %x != %x\n",
227			    fragment[i].ipid, ip_id);
228#endif /* FRAG_DEBUG */
229			frag_free(i);
230		}
231	}
232
233	if (frag_last() < 0)
234		return (FALSE);
235
236	total = fragment[last_frag].offset + fragment[last_frag].iplen -
237	    fragment[last_frag].iphlen;
238
239	for (i = 0, actual = 0; i < FRAG_MAX; i++)
240		actual += (fragment[i].iplen - fragment[i].iphlen);
241
242#ifdef FRAG_DEBUG
243	frag_disp(total);
244#endif /* FRAG_DEBUG */
245
246	return (total == actual);
247}
248
249/*
250 * Load the assembled fragments into igp. Returns 0 for success, nonzero
251 * otherwise.
252 */
253static int
254frag_load(struct inetgram *igp)
255{
256	int	i;
257	int16_t	len;
258	uint_t	total_len;
259	boolean_t first_frag = B_FALSE;
260	mblk_t *mp;
261	struct ip *iph;
262	int first_iph_len;
263
264	if (fragments == 0)
265		return (ENOENT);
266
267	mp = igp->igm_mp;
268	/* Get the IP header length of the first fragment. */
269	i = frag_first();
270	assert(i >= 0);
271	first_iph_len = fragment[i].iphlen;
272	for (i = 0, len = 0, total_len = 0; i < FRAG_MAX; i++) {
273		if (fragment[i].mp != NULL) {
274			/*
275			 * Copy just the data (omit the ip header of all
276			 * fragments except the first one which contains
277			 * all the info...)
278			 */
279			if (fragment[i].offset == 0) {
280				len = fragment[i].iplen;
281				first_frag = B_TRUE;
282			} else {
283				len = fragment[i].iplen - fragment[i].iphlen;
284			}
285			total_len += len;
286			if (total_len > mp->b_size)
287				return (E2BIG);
288			if (first_frag) {
289				bcopy((caddr_t)(fragment[i].mp->b_rptr),
290				    (caddr_t)mp->b_rptr, len);
291				first_frag = B_FALSE;
292			} else {
293				bcopy((caddr_t)(fragment[i].mp->b_rptr +
294				    fragment[i].iphlen),
295				    (caddr_t)(mp->b_rptr + first_iph_len +
296				    fragment[i].offset), len);
297			}
298			mp->b_wptr += len;
299		}
300	}
301	/* Fix the total length in the IP header. */
302	iph = (struct ip *)mp->b_rptr;
303	iph->ip_len = htons(total_len);
304	return (0);
305}
306
307/*
308 * Locate a routing table entry based upon arguments. IP addresses expected
309 * in network order. Returns index for success, -1 if entry not found.
310 */
311static int
312find_route(uint8_t *flagp, struct in_addr *destp, struct in_addr *gatewayp)
313{
314	int i, table_entry = -1;
315
316	for (i = 0; table_entry == -1 && i < IPV4_ROUTE_TABLE_SIZE; i++) {
317		if (flagp != NULL) {
318			if (*flagp & table[i].flag)
319				table_entry = i;
320		}
321		if (destp != NULL) {
322			if (destp->s_addr == table[i].dest.s_addr)
323				table_entry = i;
324			else
325				table_entry = -1;
326		}
327		if (gatewayp != NULL) {
328			if (gatewayp->s_addr == table[i].gateway.s_addr)
329				table_entry = i;
330			else
331				table_entry = -1;
332		}
333	}
334	return (table_entry);
335}
336
337/*
338 * ADD or DEL a routing table entry. Returns 0 for success, -1 and errno
339 * otherwise. IP addresses are expected in network order.
340 */
341int
342ipv4_route(int cmd, uint8_t flag, struct in_addr *destp,
343    struct in_addr *gatewayp)
344{
345	static	int	routing_table_initialized;
346	int		index;
347	uint8_t 	tmp_flag;
348
349	if (gatewayp == NULL) {
350		errno = EINVAL;
351		return (-1);
352	}
353
354	/* initialize routing table */
355	if (routing_table_initialized == 0) {
356		for (index = 0; index < IPV4_ROUTE_TABLE_SIZE; index++)
357			table[index].flag = RT_UNUSED;
358		routing_table_initialized = 1;
359	}
360
361	switch (cmd) {
362	case IPV4_ADD_ROUTE:
363		tmp_flag = (uint8_t)RT_UNUSED;
364		if ((index = find_route(&tmp_flag, NULL, NULL)) == -1) {
365			dprintf("ipv4_route: routing table full.\n");
366			errno = ENOSPC;
367			return (-1);
368		}
369		table[index].flag = flag;
370		if (destp != NULL)
371			table[index].dest.s_addr = destp->s_addr;
372		else
373			table[index].dest.s_addr = htonl(INADDR_ANY);
374		table[index].gateway.s_addr = gatewayp->s_addr;
375		break;
376	case IPV4_BAD_ROUTE:
377		/* FALLTHRU */
378	case IPV4_DEL_ROUTE:
379		if ((index = find_route(&flag, destp, gatewayp)) == -1) {
380			dprintf("ipv4_route: No such routing entry.\n");
381			errno = ENOENT;
382			return (-1);
383		}
384		if (cmd == IPV4_DEL_ROUTE) {
385			table[index].flag = RT_UNUSED;
386			table[index].dest.s_addr = htonl(INADDR_ANY);
387			table[index].gateway.s_addr = htonl(INADDR_ANY);
388		} else
389			table[index].flag = RT_NG;
390	default:
391		errno = EINVAL;
392		return (-1);
393	}
394	return (0);
395}
396
397/*
398 * Return gateway to destination. Returns gateway IP address in network order
399 * for success, NULL if no route to destination exists.
400 */
401struct in_addr *
402ipv4_get_route(uint8_t flag, struct in_addr *destp, struct in_addr *gatewayp)
403{
404	int index;
405	if ((index = find_route(&flag, destp, gatewayp)) == -1)
406		return (NULL);
407	return (&table[index].gateway);
408}
409
410/*
411 * Initialize the IPv4 generic parts of the socket, as well as the routing
412 * table.
413 */
414void
415ipv4_socket_init(struct inetboot_socket *isp)
416{
417	isp->input[NETWORK_LVL] = ipv4_input;
418	isp->output[NETWORK_LVL] = ipv4_output;
419	isp->close[NETWORK_LVL] = NULL;
420	isp->headerlen[NETWORK_LVL] = ipv4_header_len;
421}
422
423/*
424 * Initialize a raw ipv4 socket.
425 */
426void
427ipv4_raw_socket(struct inetboot_socket *isp, uint8_t proto)
428{
429	isp->type = INETBOOT_RAW;
430	if (proto == 0)
431		isp->proto = IPPROTO_IP;
432	else
433		isp->proto = proto;
434	isp->input[TRANSPORT_LVL] = NULL;
435	isp->output[TRANSPORT_LVL] = NULL;
436	isp->headerlen[TRANSPORT_LVL] = NULL;
437	isp->ports = NULL;
438}
439
440/*
441 * Return the size of an IPv4 header (no options)
442 */
443/* ARGSUSED */
444int
445ipv4_header_len(struct inetgram *igm)
446{
447	return (sizeof (struct ip));
448}
449
450/*
451 * Set our source address.
452 * Argument is assumed to be host order.
453 */
454void
455ipv4_setipaddr(struct in_addr *ip)
456{
457	myip.s_addr = htonl(ip->s_addr);
458}
459
460/*
461 * Returns our current source address in host order.
462 */
463void
464ipv4_getipaddr(struct in_addr *ip)
465{
466	ip->s_addr = ntohl(myip.s_addr);
467}
468
469/*
470 * Set our netmask.
471 * Argument is assumed to be host order.
472 */
473void
474ipv4_setnetmask(struct in_addr *ip)
475{
476	netmask_set = B_TRUE;
477	netmask.s_addr = htonl(ip->s_addr);
478	mynet.s_addr = netmask.s_addr & myip.s_addr; /* implicit */
479}
480
481void
482ipv4_getnetid(struct in_addr *my_netid)
483{
484	struct in_addr my_netmask;
485	if (mynet.s_addr != 0)
486		my_netid->s_addr = ntohl(mynet.s_addr);
487	else {
488		ipv4_getnetmask(&my_netmask);
489		my_netid->s_addr = my_netmask.s_addr & ntohl(myip.s_addr);
490	}
491}
492
493/*
494 * Returns our current netmask in host order.
495 * Neither OBP nor the standalone DHCP client mandate
496 * that the netmask be specified, so in the absence of
497 * a netmask, we attempt to derive it using class-based
498 * heuristics.
499 */
500void
501ipv4_getnetmask(struct in_addr *ip)
502{
503	if (netmask_set || (myip.s_addr == 0))
504		ip->s_addr = ntohl(netmask.s_addr);
505	else {
506		/* base the netmask on our IP address */
507		if (IN_CLASSA(ntohl(myip.s_addr)))
508			ip->s_addr = ntohl(IN_CLASSA_NET);
509		else if (IN_CLASSB(ntohl(myip.s_addr)))
510			ip->s_addr = ntohl(IN_CLASSB_NET);
511		else if (IN_CLASSC(ntohl(myip.s_addr)))
512			ip->s_addr = ntohl(IN_CLASSC_NET);
513		else
514			ip->s_addr = ntohl(IN_CLASSE_NET);
515	}
516}
517
518/*
519 * Set our default router.
520 * Argument is assumed to be host order, and *MUST* be on the same network
521 * as our source IP address.
522 */
523void
524ipv4_setdefaultrouter(struct in_addr *ip)
525{
526	defaultrouter.s_addr = htonl(ip->s_addr);
527}
528
529/*
530 * Returns our current default router in host order.
531 */
532void
533ipv4_getdefaultrouter(struct in_addr *ip)
534{
535	ip->s_addr = ntohl(defaultrouter.s_addr);
536}
537
538/*
539 * Toggle promiscuous flag. If set, client disregards destination IP
540 * address. Otherwise, only limited broadcast, network broadcast, and
541 * unicast traffic get through. Returns previous setting.
542 */
543int
544ipv4_setpromiscuous(int toggle)
545{
546	int old = promiscuous;
547
548	promiscuous = toggle;
549
550	return (old);
551}
552
553/*
554 * Set IP TTL.
555 */
556void
557ipv4_setmaxttl(uint8_t cttl)
558{
559	ttl = cttl;
560}
561
562/*
563 * Convert an ipv4 address to dotted notation.
564 * Returns ptr to statically allocated buffer containing dotted string.
565 */
566char *
567inet_ntoa(struct in_addr ip)
568{
569	uint8_t *p;
570	static char ipaddr[16];
571
572	p = (uint8_t *)&ip.s_addr;
573	(void) sprintf(ipaddr, "%u.%u.%u.%u", p[0], p[1], p[2], p[3]);
574	return (ipaddr);
575}
576
577/*
578 * Construct a transport datagram from a series of IP fragments (igp == NULL)
579 * or from a single IP datagram (igp != NULL). Return the address of the
580 * contructed transport datagram.
581 */
582struct inetgram *
583make_trans_datagram(int index, struct inetgram *igp, struct in_addr ipsrc,
584    struct in_addr ipdst, uint16_t iphlen)
585{
586	uint16_t	trans_len, *transp, new_len;
587	int		first_frag, last_frag;
588	boolean_t	fragmented;
589	struct inetgram	*ngp;
590	struct ip	*iph;
591
592	fragmented = (igp == NULL);
593
594	ngp = (struct inetgram *)bkmem_zalloc(sizeof (struct inetgram));
595	if (ngp == NULL) {
596		errno = ENOMEM;
597		if (fragmented)
598			frag_flush();
599		return (NULL);
600	}
601
602	if (fragmented) {
603		last_frag = frag_last();
604		trans_len = fragment[last_frag].offset +
605		    fragment[last_frag].iplen - fragment[last_frag].iphlen;
606		first_frag = frag_first();
607		/*
608		 * The returned buffer contains the IP header of the
609		 * first fragment.
610		 */
611		trans_len += fragment[first_frag].iphlen;
612		transp = (uint16_t *)(fragment[first_frag].mp->b_rptr +
613		    fragment[first_frag].iphlen);
614	} else {
615		/*
616		 * Note that igm_len may not be the real length of an
617		 * IP packet because some network interface, such as
618		 * Ethernet, as a minimum frame size.  So we should not
619		 * use the interface frame size to determine the
620		 * length of an IP packet.  We should use the IP
621		 * length field in the IP header.
622		 */
623		iph = (struct ip *)igp->igm_mp->b_rptr;
624		trans_len = ntohs(iph->ip_len);
625		transp = (uint16_t *)(igp->igm_mp->b_rptr + iphlen);
626	}
627
628	ngp->igm_saddr.sin_addr.s_addr = ipsrc.s_addr;
629	ngp->igm_saddr.sin_port = sockets[index].ports(transp, SOURCE);
630	ngp->igm_target.s_addr = ipdst.s_addr;
631	ngp->igm_level = TRANSPORT_LVL;
632
633	/*
634	 * Align to 16bit value.  Checksum code may require an extra byte
635	 * for padding.
636	 */
637	new_len = ((trans_len + sizeof (int16_t) - 1) &
638	    ~(sizeof (int16_t) - 1));
639	if ((ngp->igm_mp = allocb(new_len, 0)) == NULL) {
640		errno = ENOMEM;
641		bkmem_free((caddr_t)ngp, sizeof (struct inetgram));
642		if (fragmented)
643			frag_flush();
644		return (NULL);
645	}
646
647	if (fragmented) {
648		if (frag_load(ngp) != 0) {
649			freeb(ngp->igm_mp);
650			bkmem_free((caddr_t)ngp, sizeof (struct inetgram));
651			frag_flush();
652			return (NULL);
653		}
654		frag_flush();
655	} else {
656		bcopy((caddr_t)(igp->igm_mp->b_rptr),
657		    (caddr_t)ngp->igm_mp->b_rptr, trans_len);
658		ngp->igm_mp->b_wptr += trans_len;
659	}
660	return (ngp);
661}
662
663/*
664 * ipv4_input: Pull in IPv4 datagrams addressed to us. Handle IP fragmentation
665 * (fragments received in any order) and ICMP at this level.
666 *
667 * Note that because our network is serviced by polling when we expect
668 * something (upon a referenced socket), we don't go through the work of
669 * locating the appropriate socket a datagram is destined for. We'll only
670 * accept data for the referenced socket. This means we don't have
671 * asynchronous networking, but since we can't service the net using an
672 * interrupt handler, it doesn't do us any good to try to service datagrams
673 * destined for sockets other than the referenced one. Data is handled in
674 * a fifo manner.
675 *
676 * The mac layer will grab all frames for us. If we find we don't have all
677 * the necessary fragments to reassemble the datagram, we'll call the mac
678 * layer again for FRAG_ATTEMPTS to see if it has any more frames.
679 *
680 * Supported protocols: IPPROTO_IP, IPPROTO_ICMP, IPPROTO_UDP.
681 *
682 * Returns: number of NETWORK_LVL datagrams placed on socket , -1 if error
683 * occurred.
684 *
685 * Note: errno is set to ETIMEDOUT if fragment reassembly fails.
686 */
687int
688ipv4_input(int index)
689{
690	int			datagrams = 0;
691	int			frag_stat, input_attempts = 0;
692	uint16_t		iphlen, iplen, ip_id;
693	int16_t			curr_off;
694	struct ip		*iphp;
695	struct inetgram		*igp, *newgp = NULL, *ipv4_listp = NULL;
696	struct in_addr		ipdst, ipsrc;
697	mblk_t			*mp;
698	enum SockType		type;
699
700#ifdef	DEBUG
701	printf("ipv4_input(%d): start ######################################\n",
702	    index);
703#endif	/* DEBUG */
704
705	frag_flush();
706
707ipv4_try_again:
708
709	while ((igp = sockets[index].inq) != NULL) {
710		if (igp->igm_level != NETWORK_LVL) {
711#ifdef	DEBUG
712			printf("ipv4_input(%d): unexpected frame type: %d\n",
713			    index, igp->igm_level);
714#endif	/* DEBUG */
715			del_gram(&sockets[index].inq, igp, TRUE);
716			continue;
717		}
718		iphp = (struct ip *)igp->igm_mp->b_rptr;
719		if (iphp->ip_v != IPVERSION) {
720			dprintf("ipv4_input(%d): IPv%d datagram discarded\n",
721			index, iphp->ip_v);
722			del_gram(&sockets[index].inq, igp, TRUE);
723			continue;
724		}
725		iphlen = iphp->ip_hl << 2;
726		if (iphlen < sizeof (struct ip)) {
727			dprintf("ipv4_input(%d): IP msg too short (%d < %u)\n",
728			    index, iphlen, (uint_t)sizeof (struct ip));
729			del_gram(&sockets[index].inq, igp, TRUE);
730			continue;
731		}
732		iplen = ntohs(iphp->ip_len);
733		if (iplen > msgdsize(igp->igm_mp)) {
734			dprintf("ipv4_input(%d): IP len/buffer mismatch "
735			    "(%d > %lu)\n", index, iplen, igp->igm_mp->b_size);
736			del_gram(&sockets[index].inq, igp, TRUE);
737			continue;
738		}
739
740		bcopy((caddr_t)&(iphp->ip_dst), (caddr_t)&ipdst,
741		    sizeof (ipdst));
742		bcopy((caddr_t)&(iphp->ip_src), (caddr_t)&ipsrc,
743		    sizeof (ipsrc));
744
745		/* igp->igm_mp->b_datap is guaranteed to be 64 bit aligned] */
746		if (ipv4cksum((uint16_t *)iphp, iphlen) != 0) {
747			dprintf("ipv4_input(%d): Bad IP header checksum "
748			    "(to %s)\n", index, inet_ntoa(ipdst));
749			del_gram(&sockets[index].inq, igp, TRUE);
750			continue;
751		}
752
753		if (!promiscuous) {
754			/* validate destination address */
755			if (ipdst.s_addr != htonl(INADDR_BROADCAST) &&
756			    ipdst.s_addr != (mynet.s_addr | ~netmask.s_addr) &&
757			    ipdst.s_addr != myip.s_addr) {
758#ifdef	DEBUG
759				printf("ipv4_input(%d): msg to %s discarded.\n",
760				    index, inet_ntoa(ipdst));
761#endif	/* DEBUG */
762				/* not ours */
763				del_gram(&sockets[index].inq, igp, TRUE);
764				continue;
765			}
766		}
767
768		/* Intercept ICMP first */
769		if (!promiscuous && (iphp->ip_p == IPPROTO_ICMP)) {
770			icmp4(igp, iphp, iphlen, ipsrc);
771			del_gram(&sockets[index].inq, igp, TRUE);
772			continue;
773		}
774
775#ifdef	DEBUG
776		printf("ipv4_input(%d): processing ID: 0x%x protocol %d "
777		    "(0x%x) (0x%x,%d)\n",
778		    index, ntohs(iphp->ip_id), iphp->ip_p, igp, igp->igm_mp,
779		    igp->igm_mp->b_size);
780#endif	/* DEBUG */
781		type = sockets[index].type;
782		if (type == INETBOOT_RAW) {
783			/* No fragmentation - Just the raw packet. */
784#ifdef	DEBUG
785			printf("ipv4_input(%d): Raw packet.\n", index);
786#endif	/* DEBUG */
787			del_gram(&sockets[index].inq, igp, FALSE);
788			add_grams(&ipv4_listp, igp);
789			igp->igm_mp->b_rptr += iphlen;
790			igp->igm_mp->b_wptr = igp->igm_mp->b_rptr + iplen;
791			datagrams++;
792			continue;
793		}
794
795		if ((type == INETBOOT_DGRAM && iphp->ip_p != IPPROTO_UDP) ||
796		    (type == INETBOOT_STREAM && iphp->ip_p != IPPROTO_TCP)) {
797			/* Wrong protocol. */
798			dprintf("ipv4_input(%d): unexpected protocol: "
799			    "%d for socket type %d\n", index, iphp->ip_p, type);
800			del_gram(&sockets[index].inq, igp, TRUE);
801			continue;
802		}
803
804		/*
805		 * The following code is common to both STREAM and DATAGRAM
806		 * sockets.
807		 */
808
809		/*
810		 * Once we process the first fragment, we won't have
811		 * the transport header, so we'll have to  match on
812		 * IP id.
813		 */
814		curr_off = ntohs(iphp->ip_off);
815		if ((curr_off & ~(IP_DF | IP_MF)) == 0) {
816			uint16_t	*transp;
817
818			/* Validate transport header. */
819			mp = igp->igm_mp;
820			if ((mp->b_wptr - mp->b_rptr - iphlen) <
821			    sockets[index].headerlen[TRANSPORT_LVL](igp)) {
822				dprintf("ipv4_input(%d): datagram 0 "
823				    "too small to hold transport header "
824				    "(from %s)\n", index, inet_ntoa(ipsrc));
825				del_gram(&sockets[index].inq, igp, TRUE);
826				continue;
827			}
828
829			/*
830			 * check alignment - transport elements are 16
831			 * bit aligned..
832			 */
833			transp = (uint16_t *)(mp->b_rptr + iphlen);
834			if ((uintptr_t)transp % sizeof (uint16_t)) {
835				dprintf("ipv4_input(%d): Transport "
836				    "header is not 16-bit aligned "
837				    "(0x%lx, from %s)\n", index, (long)transp,
838				    inet_ntoa(ipsrc));
839				del_gram(&sockets[index].inq, igp, TRUE);
840				continue;
841			}
842
843			if (curr_off & IP_MF) {
844				/* fragment 0 of fragmented datagram */
845				ip_id = ntohs(iphp->ip_id);
846				frag_stat = frag_add(curr_off, igp->igm_mp,
847				    ip_id, iplen, iphlen, iphp->ip_p);
848				if (frag_stat != FRAG_SUCCESS) {
849#ifdef	FRAG_DEBUG
850					if (frag_stat == FRAG_DUP) {
851						printf("ipv4_input"
852						    "(%d): Frag dup.\n", index);
853					} else {
854						printf("ipv4_input"
855						    "(%d): too many "
856						    "frags\n", index);
857					}
858#endif	/* FRAG_DEBUG */
859					del_gram(&sockets[index].inq,
860					    igp, TRUE);
861					continue;
862				}
863
864				del_gram(&sockets[index].inq, igp, FALSE);
865				/* keep the data, lose the inetgram */
866				bkmem_free((caddr_t)igp,
867				    sizeof (struct inetgram));
868#ifdef	FRAG_DEBUG
869				printf("ipv4_input(%d): Frag/Off/Id "
870				    "(%d/%d/%x)\n", index, fragments,
871				    IPV4_OFFSET(curr_off), ip_id);
872#endif	/* FRAG_DEBUG */
873			} else {
874				/* Single, unfragmented datagram */
875				newgp = make_trans_datagram(index, igp,
876				    ipsrc, ipdst, iphlen);
877				if (newgp != NULL) {
878					add_grams(&ipv4_listp, newgp);
879					datagrams++;
880				}
881				del_gram(&sockets[index].inq, igp,
882				    TRUE);
883				continue;
884			}
885		} else {
886			/* fragments other than 0 */
887			frag_stat = frag_add(curr_off, igp->igm_mp,
888			    ntohs(iphp->ip_id), iplen, iphlen, iphp->ip_p);
889
890			if (frag_stat == FRAG_SUCCESS) {
891#ifdef	FRAG_DEBUG
892				printf("ipv4_input(%d): Frag(%d) "
893				    "off(%d) id(%x)\n", index,
894				    fragments, IPV4_OFFSET(curr_off),
895				    ntohs(iphp->ip_id));
896#endif	/* FRAG_DEBUG */
897				del_gram(&sockets[index].inq, igp, FALSE);
898				/* keep the data, lose the inetgram */
899				bkmem_free((caddr_t)igp,
900				    sizeof (struct inetgram));
901			} else {
902#ifdef	FRAG_DEBUG
903				if (frag_stat == FRAG_DUP)
904					printf("ipv4_input(%d): Frag "
905					    "dup.\n", index);
906				else {
907					printf("ipv4_input(%d): too "
908					    "many frags\n", index);
909				}
910#endif	/* FRAG_DEBUG */
911				del_gram(&sockets[index].inq, igp, TRUE);
912				continue;
913			}
914		}
915
916		/*
917		 * Determine if we have all of the fragments.
918		 *
919		 * NOTE: at this point, we've placed the data in the
920		 * fragment table, and the inetgram (igp) has been
921		 * deleted.
922		 */
923		if (!frag_chk())
924			continue;
925
926		newgp = make_trans_datagram(index, NULL, ipsrc, ipdst, iphlen);
927		if (newgp == NULL)
928			continue;
929		add_grams(&ipv4_listp, newgp);
930		datagrams++;
931	}
932	if (ipv4_listp == NULL && fragments != 0) {
933		if (++input_attempts > FRAG_ATTEMPTS) {
934			dprintf("ipv4_input(%d): reassembly(%d) timed out in "
935			    "%d msecs.\n", index, fragments,
936			    sockets[index].in_timeout * input_attempts);
937			frag_flush();
938			errno = ETIMEDOUT;
939			return (-1);
940		} else {
941			/*
942			 * Call the media layer again... there may be more
943			 * packets waiting.
944			 */
945			if (sockets[index].input[MEDIA_LVL](index) < 0) {
946				/* errno will be set appropriately */
947				frag_flush();
948				return (-1);
949			}
950			goto ipv4_try_again;
951		}
952	}
953
954	add_grams(&sockets[index].inq, ipv4_listp);
955
956	return (datagrams);
957}
958
959/*
960 * ipv4_output: Generate IPv4 datagram(s) for the payload and deliver them.
961 * Routing is handled here as well, by reusing the saddr field to hold the
962 * router's IP address.
963 *
964 * We don't deal with fragmentation on the outgoing side.
965 *
966 * Arguments: index to socket, inetgram to send.
967 *
968 * Returns: 0 for success, -1 if error occurred.
969 */
970int
971ipv4_output(int index, struct inetgram *ogp)
972{
973	struct ip	*iphp;
974	uint64_t	iphbuffer[sizeof (struct ip)];
975
976#ifdef	DEBUG
977	printf("ipv4_output(%d): size %d\n", index,
978	    ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr);
979#endif	/* DEBUG */
980
981	/* we don't deal (yet) with fragmentation. Maybe never will */
982	if ((ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr) > mac_get_mtu()) {
983		dprintf("ipv4: datagram too big for MAC layer.\n");
984		errno = E2BIG;
985		return (-1);
986	}
987
988	if (ogp->igm_level != NETWORK_LVL) {
989#ifdef	DEBUG
990		printf("ipv4_output(%d): unexpected frame type: %d\n", index,
991		    ogp->igm_level);
992#endif	/* DEBUG */
993		errno = EINVAL;
994		return (-1);
995	}
996
997	if (sockets[index].out_flags & SO_DONTROUTE)
998		ogp->igm_oflags |= MSG_DONTROUTE;
999
1000	iphp = (struct ip *)&iphbuffer;
1001	iphp->ip_v = IPVERSION;
1002	iphp->ip_hl = sizeof (struct ip) / 4;
1003	iphp->ip_tos = 0;
1004	iphp->ip_len = htons(ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr +
1005	    sizeof (struct ip));
1006	iphp->ip_id = htons(++g_ip_id);
1007	iphp->ip_off = htons(IP_DF);
1008	iphp->ip_p = sockets[index].proto;
1009	iphp->ip_sum = htons(0);
1010	iphp->ip_ttl = ttl;
1011
1012	/* struct copies */
1013	iphp->ip_src = myip;
1014	iphp->ip_dst = ogp->igm_saddr.sin_addr;
1015
1016	/*
1017	 * On local / limited broadcasts, don't route. From a purist's
1018	 * perspective, we should be setting the TTL to 1. But
1019	 * operational experience has shown that some BOOTP relay agents
1020	 * (ciscos) discard our packets. Furthermore, these devices also
1021	 * *don't* reset the TTL to MAXTTL on the unicast side of the
1022	 * BOOTP relay agent! Sigh. Thus to work correctly in these
1023	 * environments, we leave the TTL as it has been been set by
1024	 * the application layer, and simply don't check for a route.
1025	 */
1026	if (iphp->ip_dst.s_addr == htonl(INADDR_BROADCAST) ||
1027	    (netmask.s_addr != htonl(INADDR_BROADCAST) &&
1028	    iphp->ip_dst.s_addr == (mynet.s_addr | ~netmask.s_addr))) {
1029		ogp->igm_oflags |= MSG_DONTROUTE;
1030	}
1031
1032	/* Routing necessary? */
1033	if ((ogp->igm_oflags & MSG_DONTROUTE) == 0 &&
1034	    ((iphp->ip_dst.s_addr & netmask.s_addr) != mynet.s_addr)) {
1035		struct in_addr *rip;
1036		if ((rip = ipv4_get_route(RT_HOST, &iphp->ip_dst,
1037		    NULL)) == NULL) {
1038			rip = ipv4_get_route(RT_DEFAULT, NULL, NULL);
1039		}
1040		if (rip == NULL) {
1041			dprintf("ipv4(%d): No route to %s.\n",
1042			    index, inet_ntoa(iphp->ip_dst));
1043			errno = EHOSTUNREACH;
1044			return (-1);
1045		}
1046		ogp->igm_router.s_addr = rip->s_addr;
1047	} else
1048		ogp->igm_router.s_addr = htonl(INADDR_ANY);
1049
1050	iphp->ip_sum = ipv4cksum((uint16_t *)iphp, sizeof (struct ip));
1051	ogp->igm_mp->b_rptr -= sizeof (struct ip);
1052	bcopy((caddr_t)iphp, (caddr_t)(ogp->igm_mp->b_rptr),
1053	    sizeof (struct ip));
1054
1055	ogp->igm_level = MEDIA_LVL;
1056
1057	return (0);
1058}
1059
1060/*
1061 * Function to be called by TCP to send out a packet.  This is used
1062 * when TCP wants to send out packets which it has already filled in
1063 * most of the header fields.
1064 */
1065int
1066ipv4_tcp_output(int sock_id, mblk_t *pkt)
1067{
1068	struct ip *iph;
1069	struct in_addr *rip = NULL;
1070	struct inetgram datagram;
1071
1072	iph = (struct ip *)pkt->b_rptr;
1073
1074	bzero(&datagram, sizeof (struct inetgram));
1075
1076	/*
1077	 * Bootparams doesn't know about subnet masks, so we need to
1078	 * explicitly check for this flag.
1079	 */
1080	if (sockets[sock_id].out_flags & SO_DONTROUTE)
1081		datagram.igm_oflags |= MSG_DONTROUTE;
1082
1083	/* Routing necessary? */
1084	if (((datagram.igm_oflags & MSG_DONTROUTE) == 0) &&
1085		((iph->ip_dst.s_addr & netmask.s_addr) != mynet.s_addr)) {
1086		if ((rip = ipv4_get_route(RT_HOST, &iph->ip_dst,
1087		    NULL)) == NULL) {
1088			rip = ipv4_get_route(RT_DEFAULT, NULL, NULL);
1089		}
1090		if (rip == NULL) {
1091			dprintf("ipv4(%d): No route to %s.\n",
1092			    sock_id, inet_ntoa(iph->ip_dst));
1093			errno = EHOSTUNREACH;
1094			return (-1);
1095		}
1096	}
1097
1098	iph->ip_id = htons(++g_ip_id);
1099	iph->ip_sum = ipv4cksum((uint16_t *)iph, sizeof (struct ip));
1100#if DEBUG > 1
1101	printf("ipv4_tcp_output: dump IP packet(%d)\n", iph->ip_len);
1102	hexdump((char *)pkt->b_rptr, iph->ip_len);
1103#endif
1104	/* Call the MAC layer output routine to send it out. */
1105	datagram.igm_mp = pkt;
1106	datagram.igm_level = MEDIA_LVL;
1107	if (rip != NULL)
1108		datagram.igm_router.s_addr = rip->s_addr;
1109	else
1110		datagram.igm_router.s_addr = 0;
1111	return (mac_state.mac_output(sock_id, &datagram));
1112}
1113
1114/*
1115 * Internet address interpretation routine.
1116 * All the network library routines call this
1117 * routine to interpret entries in the data bases
1118 * which are expected to be an address.
1119 * The value returned is in network order.
1120 */
1121in_addr_t
1122inet_addr(const char *cp)
1123{
1124	uint32_t val, base, n;
1125	char c;
1126	uint32_t parts[4], *pp = parts;
1127
1128	if (*cp == '\0')
1129		return ((uint32_t)-1); /* disallow null string in cp */
1130again:
1131	/*
1132	 * Collect number up to ``.''.
1133	 * Values are specified as for C:
1134	 * 0x=hex, 0=octal, other=decimal.
1135	 */
1136	val = 0; base = 10;
1137	if (*cp == '0') {
1138		if (*++cp == 'x' || *cp == 'X')
1139			base = 16, cp++;
1140		else
1141			base = 8;
1142	}
1143	while ((c = *cp) != NULL) {
1144		if (isdigit(c)) {
1145			if ((c - '0') >= base)
1146			    break;
1147			val = (val * base) + (c - '0');
1148			cp++;
1149			continue;
1150		}
1151		if (base == 16 && isxdigit(c)) {
1152			val = (val << 4) + (c + 10 - (islower(c) ? 'a' : 'A'));
1153			cp++;
1154			continue;
1155		}
1156		break;
1157	}
1158	if (*cp == '.') {
1159		/*
1160		 * Internet format:
1161		 *	a.b.c.d
1162		 *	a.b.c	(with c treated as 16-bits)
1163		 *	a.b	(with b treated as 24 bits)
1164		 */
1165		if ((pp >= parts + 3) || (val > 0xff)) {
1166			return ((uint32_t)-1);
1167		}
1168		*pp++ = val, cp++;
1169		goto again;
1170	}
1171	/*
1172	 * Check for trailing characters.
1173	 */
1174	if (*cp && !isspace(*cp)) {
1175		return ((uint32_t)-1);
1176	}
1177	*pp++ = val;
1178	/*
1179	 * Concoct the address according to
1180	 * the number of parts specified.
1181	 */
1182	n = pp - parts;
1183	switch (n) {
1184
1185	case 1:				/* a -- 32 bits */
1186		val = parts[0];
1187		break;
1188
1189	case 2:				/* a.b -- 8.24 bits */
1190		if (parts[1] > 0xffffff)
1191		    return ((uint32_t)-1);
1192		val = (parts[0] << 24) | (parts[1] & 0xffffff);
1193		break;
1194
1195	case 3:				/* a.b.c -- 8.8.16 bits */
1196		if (parts[2] > 0xffff)
1197		    return ((uint32_t)-1);
1198		val = (parts[0] << 24) | ((parts[1] & 0xff) << 16) |
1199			(parts[2] & 0xffff);
1200		break;
1201
1202	case 4:				/* a.b.c.d -- 8.8.8.8 bits */
1203		if (parts[3] > 0xff)
1204		    return ((uint32_t)-1);
1205		val = (parts[0] << 24) | ((parts[1] & 0xff) << 16) |
1206		    ((parts[2] & 0xff) << 8) | (parts[3] & 0xff);
1207		break;
1208
1209	default:
1210		return ((uint32_t)-1);
1211	}
1212	val = htonl(val);
1213	return (val);
1214}
1215
1216void
1217hexdump(char *data, int datalen)
1218{
1219	char *p;
1220	ushort_t *p16 = (ushort_t *)data;
1221	char *p8 = data;
1222	int i, left, len;
1223	int chunk = 16;  /* 16 bytes per line */
1224
1225	printf("\n");
1226
1227	for (p = data; p < data + datalen; p += chunk) {
1228		printf("\t%4d: ", (int)(p - data));
1229		left = (data + datalen) - p;
1230		len = MIN(chunk, left);
1231		for (i = 0; i < (len / 2); i++)
1232			printf("%04x ", ntohs(*p16++) & 0xffff);
1233		if (len % 2) {
1234			printf("%02x   ", *((unsigned char *)p16));
1235		}
1236		for (i = 0; i < (chunk - left) / 2; i++)
1237			printf("     ");
1238
1239		printf("   ");
1240		for (i = 0; i < len; i++, p8++)
1241			printf("%c", isprint(*p8) ? *p8 : '.');
1242		printf("\n");
1243	}
1244
1245	printf("\n");
1246}
1247