1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * Ethernet routines. Includes ARP and Reverse ARP. Used for ethernet-like
31 * media also - so be sure NOT to use ETHERMTU as a mtu limit. macinit()
32 * will set this appropriately.
33 */
34
35#include <sys/types.h>
36#include <socket_impl.h>
37#include <socket_inet.h>
38#include <sys/time.h>
39#include <sys/socket.h>
40#include <net/if.h>
41#include <net/if_arp.h>
42#include <netinet/in_systm.h>
43#include <netinet/in.h>
44#include <netinet/ip.h>
45#include <netinet/if_ether.h>
46#include <sys/promif.h>
47#include <sys/prom_plat.h>
48#include <sys/salib.h>
49#include <sys/bootdebug.h>
50
51#include "ipv4.h"
52#include "ipv4_impl.h"
53#include "mac.h"
54#include "mac_impl.h"
55#include "ethernet_inet.h"
56
57ether_addr_t etherbroadcastaddr = {
58	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
59};
60
61struct arp_packet {
62	struct ether_header	arp_eh;
63	struct ether_arp	arp_ea;
64#define	USED_SIZE (sizeof (struct ether_header) + sizeof (struct ether_arp))
65	char	filler[ETHERMIN - sizeof (struct ether_arp)];
66};
67
68static char *
69ether_print(ether_addr_t ea)
70{
71	static char eprintbuf[20];
72
73	(void) sprintf(eprintbuf, "%x:%x:%x:%x:%x:%x", ea[0], ea[1], ea[2],
74	    ea[3], ea[4], ea[5]);
75	return (eprintbuf);
76}
77
78/*
79 * Common ARP code. Broadcast the packet and wait for the right response.
80 *
81 * If rarp is called for, caller expects an IPv4 address in the target
82 * protocol address (tpa) field of the "out" argument.
83 *
84 * If arp is called for, caller expects a hardware address in the
85 * source hardware address (sha) field of the "out" argument.
86 *
87 * Returns TRUE if transaction succeeded, FALSE otherwise.
88 *
89 * The timeout argument is the number of milliseconds to wait for a
90 * response. An infinite timeout can be specified as 0xffffffff.
91 */
92static int
93ether_comarp(struct arp_packet *out, uint32_t timeout)
94{
95	struct arp_packet *in = (struct arp_packet *)mac_state.mac_buf;
96	int count, time, feedback, len, delay = 2;
97	char    *ind = "-\\|/";
98	struct in_addr tmp_ia;
99	uint32_t wait_time;
100
101	bcopy((caddr_t)etherbroadcastaddr, (caddr_t)&out->arp_eh.ether_dhost,
102	    sizeof (ether_addr_t));
103	bcopy((caddr_t)mac_state.mac_addr_buf,
104	    (caddr_t)&out->arp_eh.ether_shost, sizeof (ether_addr_t));
105
106	out->arp_ea.arp_hrd =  htons(ARPHRD_ETHER);
107	out->arp_ea.arp_pro = htons(ETHERTYPE_IP);
108	out->arp_ea.arp_hln = sizeof (ether_addr_t);
109	out->arp_ea.arp_pln = sizeof (struct in_addr);
110	bcopy(mac_state.mac_addr_buf, (caddr_t)&out->arp_ea.arp_sha,
111	    sizeof (ether_addr_t));
112	ipv4_getipaddr(&tmp_ia);
113	tmp_ia.s_addr = htonl(tmp_ia.s_addr);
114	bcopy((caddr_t)&tmp_ia, (caddr_t)out->arp_ea.arp_spa,
115	    sizeof (struct in_addr));
116	feedback = 0;
117
118	wait_time = prom_gettime() + timeout;
119	for (count = 0; timeout == ~0U || prom_gettime() < wait_time; count++) {
120		if (count == ETHER_WAITCNT) {
121			if (out->arp_ea.arp_op == ARPOP_REQUEST) {
122				bcopy((caddr_t)out->arp_ea.arp_tpa,
123				    (caddr_t)&tmp_ia, sizeof (struct in_addr));
124				printf(
125				    "\nRequesting Ethernet address for: %s\n",
126				    inet_ntoa(tmp_ia));
127			} else {
128				printf("\nRequesting Internet address for %s\n",
129				    ether_print(out->arp_ea.arp_tha));
130			}
131		}
132
133		(void) prom_write(mac_state.mac_dev, (caddr_t)out,
134		    sizeof (*out), 0, NETWORK);
135
136		if (count >= ETHER_WAITCNT)
137			printf("%c\b", ind[feedback++ % 4]); /* activity */
138
139		time = prom_gettime() + (delay * 1000);	/* broadcast delay */
140		while (prom_gettime() <= time) {
141			len = prom_read(mac_state.mac_dev, mac_state.mac_buf,
142			    mac_state.mac_mtu, 0, NETWORK);
143			if (len < USED_SIZE)
144				continue;
145			if (in->arp_ea.arp_pro != ntohs(ETHERTYPE_IP))
146				continue;
147			if (out->arp_ea.arp_op == ntohs(ARPOP_REQUEST)) {
148				if (in->arp_eh.ether_type !=
149				    ntohs(ETHERTYPE_ARP))
150					continue;
151				if (in->arp_ea.arp_op != ntohs(ARPOP_REPLY))
152					continue;
153				if (bcmp((caddr_t)in->arp_ea.arp_spa,
154				    (caddr_t)out->arp_ea.arp_tpa,
155				    sizeof (struct in_addr)) != 0)
156					continue;
157				if (boothowto & RB_VERBOSE) {
158					bcopy((caddr_t)in->arp_ea.arp_spa,
159					    (caddr_t)&tmp_ia,
160					    sizeof (struct in_addr));
161					printf("Found %s @ %s\n",
162					    inet_ntoa(tmp_ia),
163					    ether_print(in->arp_ea.arp_sha));
164				}
165				/* copy hardware addr into "out" for caller */
166				bcopy((caddr_t)&in->arp_ea.arp_sha,
167				    (caddr_t)&out->arp_ea.arp_sha,
168				    sizeof (ether_addr_t));
169				return (TRUE);
170			} else {		/* Reverse ARP */
171				if (in->arp_eh.ether_type !=
172				    ntohs(ETHERTYPE_REVARP))
173					continue;
174				if (in->arp_ea.arp_op != ntohs(REVARP_REPLY))
175					continue;
176				if (bcmp((caddr_t)in->arp_ea.arp_tha,
177				    (caddr_t)out->arp_ea.arp_tha,
178				    sizeof (ether_addr_t)) != 0)
179					continue;
180				if (boothowto & RB_VERBOSE) {
181					bcopy((caddr_t)in->arp_ea.arp_tpa,
182					    (caddr_t)&tmp_ia,
183					    sizeof (struct in_addr));
184					printf("Internet address is: %s\n",
185					    inet_ntoa(tmp_ia));
186				}
187				/* copy IP address into "out" for caller */
188				bcopy((caddr_t)in->arp_ea.arp_tpa,
189				    (caddr_t)out->arp_ea.arp_tpa,
190				    sizeof (struct in_addr));
191				return (TRUE);
192			}
193		}
194
195		delay = delay * 2;	/* Double the request delay */
196		if (delay > 64)		/* maximum delay is 64 seconds */
197			delay = 64;
198	}
199	return (FALSE);
200}
201
202/*
203 * ARP client side
204 * Broadcasts to determine MAC address given network order IP address.
205 * See RFC 826
206 *
207 * Returns TRUE if successful, FALSE otherwise.
208 */
209int
210ether_arp(struct in_addr *ip, void *hap, uint32_t timeout)
211{
212	ether_addr_t *ep = (ether_addr_t *)hap;
213	struct arp_packet out;
214	int result;
215
216	if (!initialized)
217		prom_panic("Ethernet device is not initialized.");
218
219	bzero((char *)&out, sizeof (struct arp_packet));
220
221	out.arp_eh.ether_type = htons(ETHERTYPE_ARP);
222	out.arp_ea.arp_op = htons(ARPOP_REQUEST);
223	bcopy((caddr_t)etherbroadcastaddr, (caddr_t)&out.arp_ea.arp_tha,
224	    sizeof (ether_addr_t));
225	bcopy((caddr_t)ip, (caddr_t)out.arp_ea.arp_tpa,
226	    sizeof (struct in_addr));
227
228	result = ether_comarp(&out, timeout);
229
230	if (result && (ep != NULL)) {
231		bcopy((caddr_t)&out.arp_ea.arp_sha, (caddr_t)ep,
232		    sizeof (ether_addr_t));
233	}
234	return (result);
235}
236
237/*
238 * Reverse ARP client side
239 * Determine our Internet address given our MAC address
240 * See RFC 903
241 */
242void
243ether_revarp(void)
244{
245	struct in_addr	ip;
246	struct arp_packet out;
247
248	if (!initialized)
249		prom_panic("Ethernet device is not initialized.");
250
251	bzero((char *)&out, sizeof (struct arp_packet));
252
253	out.arp_eh.ether_type = htons(ETHERTYPE_REVARP);
254	out.arp_ea.arp_op = htons(REVARP_REQUEST);
255	bcopy(mac_state.mac_addr_buf, (caddr_t)&out.arp_ea.arp_tha,
256	    sizeof (ether_addr_t));
257
258	/* Wait forever */
259	(void) ether_comarp(&out, 0xffffffff);
260
261	bcopy((caddr_t)&out.arp_ea.arp_tpa, (caddr_t)&ip,
262	    sizeof (struct in_addr));
263
264	ip.s_addr = ntohl(ip.s_addr);
265	ipv4_setipaddr(&ip);
266}
267
268/* ARGSUSED */
269int
270ether_header_len(struct inetgram *igm)
271{
272	return (sizeof (struct ether_header));
273}
274
275/*
276 * Handle a IP datagram addressed to our ethernet address or to the
277 * ethernet broadcast address. Also respond to ARP requests. Generates
278 * inetgrams as long as there's data and the mac level IP timeout timer
279 * hasn't expired. As soon as there is no data, we try for
280 * ETHER_INPUT_ATTEMPTS for more, then exit the loop, even if there is time
281 * left, since we expect to have data waiting for us when we're called, we just
282 * don't know how much.
283 *
284 * We workaround slow proms (some proms have hard sleeps for as much as 3msec)
285 * even though there are is data waiting.
286 *
287 * Returns the total number of MEDIA_LVL frames placed on the socket.
288 * Caller is expected to free up the inetgram resources.
289 */
290int
291ether_input(int index)
292{
293	struct inetgram		*inp;
294	struct ether_header	*eh;
295	int		frames = 0;	/* successful frames */
296	int		attempts = 0;	/* failed attempts after success */
297	int16_t		len = 0, data_len;
298	uint32_t	timeout, reltime;
299	uint32_t	pre_pr, post_pr; /* prom_read interval */
300
301#ifdef	DEBUG
302	int		failures = 0;		/* total failures */
303	int		total_attempts = 0;	/* total prom_read */
304	int		no_data = 0;		/* no data in prom */
305	int		arps = 0;		/* arp requests processed */
306	uint32_t	tot_pr = 0;		/* prom_read time */
307	uint32_t	tot_pc = 0;		/* inetgram creation time */
308	uint32_t	pre_pc;
309	uint32_t	now;
310#endif	/* DEBUG */
311
312	if (!initialized)
313		prom_panic("Ethernet device is not initialized.");
314
315	if ((reltime = sockets[index].in_timeout) == 0)
316		reltime = mac_state.mac_in_timeout;
317	timeout = prom_gettime() + reltime;
318
319	do {
320		if (frames > ETHER_MAX_FRAMES) {
321			/* someone is trying a denial of service attack */
322			break;
323		}
324
325		/*
326		 * The following is a workaround for a calvin prom (V2) bug
327		 * where prom_read() returns a nonzero length, even when it's
328		 * not read a packet. So we zero out the header to compensate.
329		 */
330		bzero(mac_state.mac_buf, sizeof (struct ether_header));
331
332		/*
333		 * Prom_read() will return 0 or -2 if no data is present. A
334		 * return value of -1 means an error has occurred. We adjust
335		 * the timeout by calling the time spent in prom_read() "free".
336		 * prom_read() returns the number of bytes actually read, but
337		 * will only copy "len" bytes into our buffer. Adjust in
338		 * case the MTU is wrong.
339		 */
340		pre_pr = prom_gettime();
341		len = prom_read(mac_state.mac_dev, mac_state.mac_buf,
342		    mac_state.mac_mtu, 0, NETWORK);
343		post_pr = prom_gettime();
344		timeout += (post_pr - pre_pr);
345#ifdef	DEBUG
346		tot_pr += (post_pr - pre_pr);
347		total_attempts++;
348#endif	/* DEBUG */
349
350		if (len > mac_state.mac_mtu) {
351			dprintf("ether_input: adjusting MTU %d -> %d\n",
352			    mac_state.mac_mtu, len);
353			bkmem_free(mac_state.mac_buf, mac_state.mac_mtu);
354			mac_state.mac_mtu = len;
355			mac_state.mac_buf = bkmem_alloc(mac_state.mac_mtu);
356			if (mac_state.mac_buf == NULL) {
357				prom_panic("ether_input: Cannot reallocate "
358				    "netbuf memory.");
359			}
360			len = 0; /* pretend there was no data */
361		}
362
363		if (len == -1) {
364#ifdef	DEBUG
365			failures++;
366#endif	/* DEBUG */
367			break;
368		}
369		if (len == 0 || len == -2) {
370			if (frames != 0)
371				attempts++;
372#ifdef	DEBUG
373			no_data++;
374#endif	/* DEBUG */
375			continue;
376		}
377
378		eh = (struct ether_header *)mac_state.mac_buf;
379		if (eh->ether_type == ntohs(ETHERTYPE_IP) &&
380		    len >= (sizeof (struct ether_header) +
381		    sizeof (struct ip))) {
382
383			int offset;
384#ifdef	DEBUG
385			pre_pc = prom_gettime();
386#endif	/* DEBUG */
387
388			inp = (struct inetgram *)bkmem_zalloc(
389			    sizeof (struct inetgram));
390			if (inp == NULL) {
391				errno = ENOMEM;
392				return (frames == 0 ? -1 : frames);
393			}
394			offset = sizeof (struct ether_header);
395			data_len = len - offset;
396			inp->igm_mp = allocb(data_len, 0);
397			if (inp->igm_mp == NULL) {
398				errno = ENOMEM;
399				bkmem_free((caddr_t)inp,
400				    sizeof (struct inetgram));
401				return (frames == 0 ? -1 : frames);
402			}
403			bcopy((caddr_t)(mac_state.mac_buf + offset),
404			    inp->igm_mp->b_rptr, data_len);
405			inp->igm_mp->b_wptr += data_len;
406			inp->igm_level = NETWORK_LVL;
407			add_grams(&sockets[index].inq, inp);
408			frames++;
409			attempts = 0;
410#ifdef	DEBUG
411			tot_pc += prom_gettime() - pre_pc;
412#endif	/* DEBUG */
413			continue;
414		}
415
416		if (eh->ether_type == ntohs(ETHERTYPE_ARP) &&
417		    len >= (sizeof (struct ether_header) +
418		    sizeof (struct ether_arp))) {
419
420			struct in_addr		ip;
421			struct ether_arp	*ea;
422
423#ifdef	DEBUG
424			printf("ether_input: ARP message received\n");
425			arps++;
426#endif	/* DEBUG */
427
428			ea = (struct ether_arp *)(mac_state.mac_buf +
429			    sizeof (struct ether_header));
430			if (ea->arp_pro != ntohs(ETHERTYPE_IP))
431				continue;
432
433			ipv4_getipaddr(&ip);
434			ip.s_addr = ntohl(ip.s_addr);
435
436			if (ea->arp_op == ntohs(ARPOP_REQUEST) &&
437			    ip.s_addr != INADDR_ANY &&
438			    (bcmp((caddr_t)ea->arp_tpa, (caddr_t)&ip,
439			    sizeof (struct in_addr)) == 0)) {
440				ea->arp_op = htons(ARPOP_REPLY);
441				bcopy((caddr_t)ea->arp_sha,
442				    (caddr_t)&eh->ether_dhost,
443				    sizeof (ether_addr_t));
444				bcopy(mac_state.mac_addr_buf,
445				    (caddr_t)&eh->ether_shost,
446				    mac_state.mac_addr_len);
447				bcopy((caddr_t)ea->arp_sha,
448				    (caddr_t)ea->arp_tha,
449				    sizeof (ether_addr_t));
450				bcopy((caddr_t)ea->arp_spa,
451				    (caddr_t)ea->arp_tpa,
452				    sizeof (struct in_addr));
453				bcopy(mac_state.mac_addr_buf,
454				    (caddr_t)ea->arp_sha,
455				    mac_state.mac_addr_len);
456				bcopy((caddr_t)&ip, (caddr_t)ea->arp_spa,
457				    sizeof (struct in_addr));
458				(void) prom_write(mac_state.mac_dev,
459				    mac_state.mac_buf,
460				    sizeof (struct arp_packet),
461				    0, NETWORK);
462				/* don't charge for ARP replies */
463				timeout += reltime;
464			}
465		}
466	} while (attempts < ETHER_INPUT_ATTEMPTS &&
467#ifdef	DEBUG
468		(now = prom_gettime()) < timeout);
469#else
470		prom_gettime() < timeout);
471#endif	/* DEBUG */
472
473#ifdef	DEBUG
474	printf("ether_input(%d): T/S/N/A/F/P/M: %d/%d/%d/%d/%d/%d/%d "
475	    "T/O: %d < %d = %s\n", index, total_attempts, frames, no_data,
476	    arps, failures, tot_pr, tot_pc, now, timeout,
477	    (now < timeout) ? "TRUE" : "FALSE");
478#endif	/* DEBUG */
479	return (frames);
480}
481
482/*
483 * Send out an ethernet datagram. We expect a IP frame appropriately fragmented
484 * at this level.
485 *
486 * Errno is set and -1 is returned if an error occurs. Number of bytes sent
487 * is returned on success.
488 */
489/* ARGSUSED */
490int
491ether_output(int index, struct inetgram *ogp)
492{
493	int			header_len, result;
494	struct ether_header	eh;
495	struct ip		*ip;
496	struct in_addr		tmpip, ipdst, netid;
497	int			broadcast = FALSE;
498	int			size;
499	mblk_t			*mp;
500
501
502#ifdef DEBUG
503	printf("ether_output (%d): size %d\n", index,
504	    ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr);
505#endif
506	if (!initialized)
507		prom_panic("Ethernet device is not initialized.");
508
509	if (ogp->igm_level != MEDIA_LVL) {
510		dprintf("ether_output: frame type wrong: socket: %d\n",
511		    index * SOCKETTYPE);
512		errno = EINVAL;
513		return (-1);
514	}
515
516	header_len = sizeof (struct ether_header);
517	mp = ogp->igm_mp;
518	size = mp->b_wptr - mp->b_rptr;
519	if (size > mac_state.mac_mtu) {
520		dprintf("ether_output: frame size too big: %d\n", size);
521		errno = E2BIG;
522		return (-1);
523	}
524
525	size += header_len;
526	ip = (struct ip *)(mp->b_rptr);
527
528	eh.ether_type = htons(ETHERTYPE_IP);
529	bcopy(mac_state.mac_addr_buf, (caddr_t)&eh.ether_shost,
530	    mac_state.mac_addr_len);
531	bcopy((caddr_t)&ip->ip_dst, (caddr_t)&ipdst, sizeof (ipdst));
532
533	if (ipdst.s_addr == htonl(INADDR_BROADCAST))
534		broadcast = TRUE; /* limited broadcast */
535
536	if (!broadcast) {
537		struct in_addr mask;
538
539		ipv4_getnetid(&netid);
540		ipv4_getnetmask(&mask);
541		mask.s_addr = htonl(mask.s_addr);
542		netid.s_addr = htonl(netid.s_addr);
543
544		/*
545		 * check for all-hosts directed broadcast for
546		 * to its own subnet.
547		 */
548		if (mask.s_addr != htonl(INADDR_BROADCAST) &&
549		    (ipdst.s_addr & ~mask.s_addr) == 0 &&
550		    (ipdst.s_addr & mask.s_addr) ==  netid.s_addr) {
551			broadcast = TRUE; /* directed broadcast */
552		} else {
553			if (ogp->igm_router.s_addr != htonl(INADDR_ANY))
554				tmpip.s_addr = ogp->igm_router.s_addr;
555			else
556				tmpip.s_addr = ipdst.s_addr;
557
558			result = mac_get_arp(&tmpip, (void *)&eh.ether_dhost,
559			    sizeof (ether_addr_t), mac_state.mac_arp_timeout);
560			if (!result) {
561				errno = ETIMEDOUT;
562				dprintf("ether_output: ARP request for %s "
563				    "timed out.\n", inet_ntoa(tmpip));
564				return (-1);
565			}
566		}
567	}
568
569	if (broadcast) {
570		bcopy((caddr_t)etherbroadcastaddr,
571		    (caddr_t)&eh.ether_dhost, sizeof (ether_addr_t));
572	}
573
574	/* add the ethernet header */
575	mp->b_rptr -= sizeof (eh);
576	bcopy((caddr_t)&eh, mp->b_rptr, sizeof (eh));
577#ifdef	DEBUG
578	printf("ether_output(%d): level(%d) frame(0x%x) len(%d)\n",
579	    index, ogp->igm_level, mp->b_rptr, size);
580#if DEBUG > 1
581	printf("Dump ethernet (%d): \n", size);
582	hexdump((char *)mp->b_rptr, size);
583	printf("\n");
584#endif /* DEBUG > 1 */
585#endif	/* DEBUG */
586	return (prom_write(mac_state.mac_dev, (char *)mp->b_rptr, size,
587	    0, NETWORK));
588}
589