1/*
2 * Copyright (c) 2014, University of Washington.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, CAB F.78, Universitaetstr. 6, CH-8092 Zurich.
8 * Attn: Systems Group.
9 */
10
11#include <stdio.h>
12#include <assert.h>
13#include <barrelfish/barrelfish.h>
14#include <barrelfish/inthandler.h>
15#include <barrelfish/sys_debug.h>
16#include <skb/skb.h>
17#include <sys/socket.h>
18#include <netif/e1000.h>
19#include <limits.h>
20#include <barrelfish/waitset.h>
21#include <barrelfish/waitset_chan.h>
22#include <barrelfish/nameservice_client.h>
23#include <lwip/sock_chan_support.h>
24#include <netdb.h>
25#include <arranet.h>
26#include <arranet_impl.h>
27#include <acpi_client/acpi_client.h>
28#include <arranet_debug.h>
29#include <if/subways_defs.h>
30#include <if/monitor_defs.h>
31
32/* #undef DEFAULT_UMP_BUFLEN */
33/* #define DEFAULT_UMP_BUFLEN  ((100 * BASE_PAGE_SIZE) / 2 / UMP_MSG_BYTES * UMP_MSG_BYTES) */
34
35static ether_terminate_queue ether_terminate_queue_ptr = NULL;
36static ether_get_mac_address_t ether_get_mac_address_ptr = NULL;
37static ether_transmit_pbuf_list_t ether_transmit_pbuf_list_ptr = NULL;
38static ether_get_tx_free_slots tx_free_slots_fn_ptr = NULL;
39static ether_handle_free_TX_slot handle_free_tx_slot_fn_ptr = NULL;
40static ether_rx_register_buffer rx_register_buffer_fn_ptr = NULL;
41static ether_rx_get_free_slots rx_get_free_slots_fn_ptr = NULL;
42
43uint64_t interrupt_counter = 0;
44uint64_t total_rx_p_count = 0;
45uint64_t total_rx_datasize = 0;
46struct client_closure *g_cl = NULL;
47
48#define MAX_PACKETS     2000
49#define PACKET_SIZE     2048
50#define MAX_PEERS       256
51#define PRINT_PERIOD    (1 * 1000 * 1000)       // us
52#define CYCLES_PER_S   2200000000ULL
53#define PRINT_UTIL
54
55static int use_vtd = 0;
56static int vtd_coherency = 1;
57static bool receiver = false;
58static struct waitset_chanstate recv_chanstate;
59static struct subways_binding *subways_binding = NULL;
60
61struct peer {
62    uint32_t ip;
63    struct eth_addr mac;
64};
65
66// Configure static ARP entries here
67// IP addresses are in network byte order!
68static struct peer peers[MAX_PEERS] = {
69    {
70        // XXX: This needs to be updated each time the tap interface is re-initialized
71        .ip = 0x0102000a,       // 10.0.2.1
72        /* .mac.addr = "\x86\x86\x0b\xda\x22\xd7", */
73        .mac.addr = "\x12\x67\xb9\x3e\xe2\x2c",
74    },
75    {
76        // XXX: This needs to be updated each time the tap interface is re-initialized
77        .ip = 0x0164a8c0,       // 192.168.100.1
78        .mac.addr = "\x5e\x93\xf2\xf1\xeb\xfa",
79    },
80    {
81        .ip = 0xaf06d080,       // 128.208.6.175 - swingout2
82        .mac.addr = "\x90\xe2\xba\x3a\x2e\xdd",
83    },
84    {
85        .ip = 0xec06d080,       // 128.208.6.236 - swingout3
86        .mac.addr = "\xa0\x36\x9f\x0f\xfb\xe2",
87    },
88    {
89        .ip = 0x8106d080,       // 128.208.6.129 - swingout4
90        .mac.addr = "\xa0\x36\x9f\x10\x01\x6e",
91    },
92    {
93        .ip = 0x8206d080,       // 128.208.6.130 - swingout5
94        .mac.addr = "\xa0\x36\x9f\x10\x00\xa2",
95    },
96    {
97        .ip = 0xc506d080,       // 128.208.6.197 - swingout6
98        .mac.addr = "\xa0\x36\x9f\x10\x03\x52",
99    },
100    {
101        .ip = 0x3706000a,       // 10.0.6.55 - swingout5-e10k2
102        .mac.addr = "\xa0\x36\x9f\x10\x00\xa0",
103    },
104};
105static int peers_alloc = 8;             // Set number of static ARP here!
106
107struct pkt_ip_headers {
108    struct eth_hdr eth;
109    struct ip_hdr ip;
110} __attribute__ ((packed));
111
112struct pkt_udp_headers {
113    struct eth_hdr eth;
114    struct ip_hdr ip;
115    struct udp_hdr udp;
116} __attribute__ ((packed));
117
118struct pkt_tcp_headers {
119    struct eth_hdr eth;
120    struct ip_hdr ip;
121    struct tcp_hdr tcp;
122} __attribute__ ((packed));
123
124static struct packet rx_packets[MAX_PACKETS];
125
126/******** IP config *********/
127
128struct mac2ip {
129    uint8_t mac[ETHARP_HWADDR_LEN];
130    uint32_t ip;
131};
132
133static struct mac2ip ip_config[] = {
134    {   // QEMU
135        .mac = "\x52\x54\x00\x12\x34\x56",
136        /* .ip = 0x0a00020f,       // 10.0.2.15 */
137        .ip = 0xc0a8640f,       // 192.168.100.15
138    },
139    {
140        // QEMU2
141        .mac = "\x52\x54\x00\x12\x34\x57",
142        .ip = 0xc0a80102,       // 192.168.1.2
143    },
144    {   // swingout1 (and swingout1-vf0)
145        .mac = "\xa0\x36\x9f\x10\x00\xa6",
146        .ip = 0x80d00643,       // 128.208.6.67
147    },
148    {   // swingout1-vf1
149        .mac = "\x22\xc9\xfc\x96\x83\xfc",
150        .ip = 0x80d00644,       // 128.208.6.68
151    },
152    {   // swingout1-vf2
153        .mac = "\xce\x43\x5b\xf7\x3e\x60",
154        .ip = 0x80d00602,       // 128.208.6.2
155    },
156    {   // swingout1-vf3
157        .mac = "\x6a\xb0\x62\xf6\xa7\x21",
158        .ip = 0x80d00603,       // 128.208.6.3
159    },
160    {   // swingout1-vf4
161        .mac = "\xb2\xdf\xf9\x39\xc6\x10",
162        .ip = 0x80d00604,       // 128.208.6.4
163    },
164    {   // swingout1-vf5
165        .mac = "\x92\x77\xe7\x3f\x80\x30",
166        .ip = 0x80d0060c,       // 128.208.6.12
167    },
168    {   // swingout5
169        .mac = "\xa0\x36\x9f\x10\x00\xa2",
170        .ip = 0x80d00682,       // 128.208.6.130
171    },
172    {   // swingout5-e10k2
173        .mac = "\xa0\x36\x9f\x10\x00\xa0",
174        .ip = 0x0a000637,       // 10.0.6.55
175    },
176};
177
178static uint8_t arranet_mymac[ETHARP_HWADDR_LEN];
179static uint32_t arranet_myip = 0;
180
181void ethernetif_backend_init(char *service_name, uint64_t queueid,
182                             ether_get_mac_address_t get_mac_ptr,
183                             ether_terminate_queue terminate_queue_ptr,
184                             ether_transmit_pbuf_list_t transmit_ptr,
185                             ether_get_tx_free_slots tx_free_slots_ptr,
186                             ether_handle_free_TX_slot handle_free_tx_slot_ptr,
187                             size_t rx_bufsz,
188                             ether_rx_register_buffer rx_register_buffer_ptr,
189                             ether_rx_get_free_slots rx_get_free_slots_ptr)
190{
191    ether_terminate_queue_ptr = terminate_queue_ptr;
192    ether_get_mac_address_ptr = get_mac_ptr;
193    ether_transmit_pbuf_list_ptr = transmit_ptr;
194    tx_free_slots_fn_ptr = tx_free_slots_ptr;
195    handle_free_tx_slot_fn_ptr = handle_free_tx_slot_ptr;
196    rx_register_buffer_fn_ptr = rx_register_buffer_ptr;
197    rx_get_free_slots_fn_ptr = rx_get_free_slots_ptr;
198    /* printf("PBUF_POOL_BUFSIZE = %u, rx buffer size = %zu\n", PBUF_POOL_BUFSIZE, */
199    /*        rx_bufsz); */
200}
201
202#define MAX_DRIVER_BUFS         16
203
204static genpaddr_t rx_pbase = 0, tx_pbase = 0, packetring_pbase = 0;
205static genvaddr_t rx_vbase = 0, tx_vbase = 0, packetring_vbase = 0;
206static struct capref packetring_frame;
207
208static struct packet tx_packets[MAX_PACKETS];
209/* static uint8_t tx_bufs[MAX_PACKETS][PACKET_SIZE]; */
210static unsigned int tx_idx = 0;
211/* static ssize_t tx_packets_available = MAX_PACKETS; */
212
213#include <barrelfish/deferred.h>
214
215static void packet_output(struct packet *p)
216{
217    struct driver_buffer bufs[MAX_DRIVER_BUFS];
218    int n = 0;
219
220    for (struct packet *q = p; q != NULL; q = q->next) {
221        struct driver_buffer *buf = &bufs[n];
222
223        /* if(q->payload < &tx_bufs[0][0] || q->payload >= &tx_bufs[MAX_PACKETS][PACKET_SIZE]) { */
224        /*     printf("Called from %p %p\n", */
225        /*            __builtin_return_address(0), */
226        /*            __builtin_return_address(1)); */
227        /*     assert(q->payload >= &tx_bufs[0][0] && q->payload < &tx_bufs[MAX_PACKETS][PACKET_SIZE]); */
228        /* } */
229
230        /* Send the data from the pbuf to the interface, one pbuf at a
231           time. The size of the data in each pbuf is kept in the ->len
232           variable. */
233        assert(q->len > 0);
234
235        // Check if it's from the RX region
236        /* printf("RX region: Comparing %p against [%p:%p]\n", */
237        /*        q->payload, */
238        /*        (void *)rx_vbase, */
239        /*        (void *)(rx_vbase + (MAX_PACKETS * PACKET_SIZE + 4096))); */
240	if (!use_vtd) {
241	    if(((genvaddr_t)q->payload) >= packetring_vbase &&
242	       ((genvaddr_t)q->payload) < packetring_vbase + (MAX_PACKETS * PACKET_SIZE + 4096)) {
243	        buf->pa = packetring_pbase + ((genvaddr_t)q->payload - packetring_vbase);
244	    } else if(((genvaddr_t)q->payload) >= rx_vbase &&
245	       ((genvaddr_t)q->payload) < rx_vbase + (MAX_PACKETS * PACKET_SIZE + 4096)) {
246	        buf->pa = rx_pbase + ((genvaddr_t)q->payload - rx_vbase);
247	    } else if(((genvaddr_t)q->payload) >= tx_vbase &&
248		      ((genvaddr_t)q->payload) < tx_vbase + (MAX_PACKETS * PACKET_SIZE)) {
249	        // It is from the TX region!
250	        buf->pa = tx_pbase + ((genvaddr_t)q->payload - tx_vbase);
251	    } else {
252	        // Check if it's in morecore's region
253	        struct morecore_state *mc_state = get_morecore_state();
254		struct vspace_mmu_aware *mmu_state = &mc_state->mmu_state;
255		genvaddr_t base = vregion_get_base_addr(&mmu_state->vregion);
256		struct memobj_frame_list *i;
257
258		// Walk frame list
259		for(i = mmu_state->memobj.frame_list; i != NULL; i = i->next) {
260		    // If address is completely within frame, we can resolve
261		    // XXX: Everything else would be easier with an IOMMU
262		    /* printf("Heap: Comparing [%p:%p] against [%p:%p]\n", */
263		    /*        q->payload, q->payload + q->len, */
264		    /*        (void *)(base + i->offset), */
265		    /*        (void *)(base + i->offset + i->size)); */
266		    if(base + i->offset <= (genvaddr_t)q->payload &&
267		       ((genvaddr_t)q->payload) + q->len < base + i->offset + i->size) {
268		        assert(i->pa != 0);
269
270		        /* buf->pa = id.base + ((genvaddr_t)q->payload - base - i->offset); */
271			buf->pa = i->pa + ((genvaddr_t)q->payload - base - i->offset);
272			break;
273		    }
274		}
275
276		if(i == NULL) {
277		    // Check if it's in text/data region
278		    int entry;
279		    for(entry = 0; entry < mc_state->v2p_entries; entry++) {
280		        struct v2pmap *pmap = &mc_state->v2p_mappings[entry];
281
282			// If address is completely within frame, we can resolve
283			// XXX: Everything else would be easier with an IOMMU
284			/* printf("BSS: Comparing [%p:%p] against [%p:%p]\n", */
285			/*        q->payload, q->payload + q->len, */
286			/*        (void *)(pmap->va), */
287			/*        (void *)(pmap->va + pmap->size)); */
288			if(pmap->va <= (genvaddr_t)q->payload &&
289			   ((genvaddr_t)q->payload) + q->len < pmap->va + pmap->size) {
290			    buf->pa = pmap->pa + ((genvaddr_t)q->payload - pmap->va);
291			    break;
292			}
293		    }
294
295		    if(entry == mc_state->v2p_entries) {
296		        printf("Called from %p %p %p\n",
297			       __builtin_return_address(0),
298			       __builtin_return_address(1),
299			       __builtin_return_address(2));
300
301			USER_PANIC("Invalid pbuf! payload = %p, pa = %p, subpacket = %d\n",
302				   q->payload, buf->pa, n);
303		    }
304		}
305	    }
306	} else {
307            printf("Using VT-d to send this packet\n");
308        }
309
310        /* printf("Sending: '%s'\n", (char *)q->payload); */
311
312        buf->va = q->payload;
313        buf->len = q->len;
314/* #ifndef SENDMSG_WITH_COPY */
315/*         buf->opaque = q->opaque; */
316/* #else */
317        buf->opaque = q;
318/* #endif */
319        buf->flags = q->flags;
320
321        n++;
322    }
323
324    errval_t err = ether_transmit_pbuf_list_ptr(bufs, n);
325    assert(err_is_ok(err));
326}
327
328static struct pkt_ip_headers packet_ip_header;
329static struct pkt_udp_headers packet_udp_header;
330static struct pkt_tcp_headers packet_tcp_header;
331
332static struct peer *peers_get_from_ip(uint32_t ip)
333{
334    for(int i = 0; i < MAX_PEERS; i++) {
335        if(ip == peers[i].ip) {
336            return &peers[i];
337        }
338    }
339
340    /* printf("NOT FOUND: %x\n", ip); */
341
342    return NULL;
343}
344
345static struct peer *peers_get_next_free(void)
346{
347    if(peers_alloc < MAX_PEERS) {
348        return &peers[peers_alloc++];
349    } else {
350        return NULL;
351    }
352}
353
354static struct packet *get_tx_packet(void)
355{
356    struct packet *p = &tx_packets[tx_idx];
357
358    // Busy-wait until packet not in flight
359    while(p->len != 0) {
360        /* printf("Pipeline stalled! tx_packets_available = %zd\n", tx_packets_available); */
361        printf("Pipeline stalled!\n");
362        handle_free_tx_slot_fn_ptr();
363        /* if(!handle_free_tx_slot_fn_ptr()) { */
364        /*     printf("No packets could be freed!\n"); */
365        /* } */
366    }
367
368    /* tx_packets_available--; */
369
370    tx_idx = (tx_idx + 1) % MAX_PACKETS;
371    return p;
372}
373
374static bool packet_sent;
375
376struct send_packet_args {
377    genvaddr_t  p;
378    uint32_t    len;
379    uint64_t	pkt;
380};
381
382static void send_packet(void *arg)
383{
384    struct send_packet_args *a = arg;
385
386    printf("Sending packet finally\n");
387
388    errval_t err =
389      subways_binding->tx_vtbl.send(subways_binding, NOP_CONT, a->p, a->len, a->pkt);
390    assert(err_is_ok(err));
391
392    packet_sent = true;
393}
394
395void process_received_packet(struct driver_rx_buffer *buffer, size_t count,
396                             uint64_t flags)
397{
398    struct packet *p = buffer->opaque;
399    assert(p != NULL);
400    assert(count == 1);
401    p->len = buffer->len;
402
403    /* printf("Got %p from driver\n", p); */
404
405    if(receiver) {
406      // Don't receive packets if we're the subways receiver
407      goto out;
408    }
409
410    /* if(p < rx_packets || p > &rx_packets[MAX_PACKETS]) { */
411    /*     printf("%d: rx_packets = %p, rx_packets[MAX_PACKETS] = %p, p = %p\n", */
412    /*            disp_get_core_id(), */
413    /*            rx_packets, &rx_packets[MAX_PACKETS], p); */
414    /* } */
415
416    assert(p >= rx_packets && p < &rx_packets[MAX_PACKETS]);
417
418    // Drop packets with invalid checksums
419    if(flags & NETIF_RXFLAG_IPCHECKSUM) {
420        if(!(flags & NETIF_RXFLAG_IPCHECKSUM_GOOD)) {
421            goto out;
422        }
423    }
424
425    if(flags & NETIF_RXFLAG_L4CHECKSUM) {
426        if(!(flags & NETIF_RXFLAG_L4CHECKSUM_GOOD)) {
427            goto out;
428        }
429    }
430
431    struct eth_hdr *ethhdr = (struct eth_hdr *)p->payload;
432    switch (htons(ethhdr->type)) {
433    case ETHTYPE_ARP:
434        {
435            struct etharp_hdr *arphdr = (struct etharp_hdr *)(p->payload + SIZEOF_ETH_HDR);
436            uint32_t dipaddr = (arphdr->dipaddr.addrw[1] << 16) | arphdr->dipaddr.addrw[0];
437
438            /* printf("%d: ARP request, dip = %x\n", disp_get_core_id(), dipaddr); */
439
440            if(htons(arphdr->opcode) == ARP_REQUEST &&
441               dipaddr == arranet_myip) {
442                // Send reply
443                struct packet outp;
444		// XXX: Static payload! Need to lock if multithreaded!
445                static uint8_t payload[PACKET_SIZE];
446                struct eth_hdr *myeth = (struct eth_hdr *)payload;
447                struct etharp_hdr *myarp = (struct etharp_hdr *)(payload + SIZEOF_ETH_HDR);
448
449                /* printf("%d: ARP request for us!\n", disp_get_core_id()); */
450
451                // ETH header
452                memcpy(&myeth->dest, &arphdr->shwaddr, ETHARP_HWADDR_LEN);
453                memcpy(&myeth->src, arranet_mymac, ETHARP_HWADDR_LEN);
454                myeth->type = htons(ETHTYPE_ARP);
455
456                // ARP header
457                myarp->hwtype = htons(1);
458                myarp->proto = htons(ETHTYPE_IP);
459                myarp->hwlen = 6;
460                myarp->protolen = 4;
461                myarp->opcode = htons(ARP_REPLY);
462                memcpy(&myarp->shwaddr, arranet_mymac, ETHARP_HWADDR_LEN);
463                memcpy(&myarp->sipaddr, &arphdr->dipaddr, sizeof(myarp->sipaddr));
464                memcpy(&myarp->dhwaddr, &arphdr->shwaddr, ETHARP_HWADDR_LEN);
465                memcpy(&myarp->dipaddr, &arphdr->sipaddr, sizeof(myarp->dipaddr));
466
467                outp.payload = payload;
468                outp.len = SIZEOF_ETHARP_PACKET;
469                /* outp.len = p->len; */
470                outp.next = NULL;
471                outp.flags = 0;
472                outp.opaque = NULL;
473
474                packet_output(&outp);
475		static int arp_count = 0;
476		arp_count++;
477		if(arp_count > 100) {
478		  printf("High ARP count!\n");
479		}
480                while(!e1000n_queue_empty()) thread_yield();
481            }
482        }
483        break;
484
485    case ETHTYPE_IP:
486        {
487            struct ip_hdr *iphdr = (struct ip_hdr *)(p->payload + SIZEOF_ETH_HDR);
488
489            // Only if receiver is bound
490            if(subways_binding == NULL) {
491                break;
492            }
493
494	    if(IPH_PROTO(iphdr) != IP_PROTO_IPENCAP) {
495	      break;
496	    }
497
498            /* if(p < rx_packets || p > &rx_packets[MAX_PACKETS]) { */
499            /*     printf("%d: inner process_received_packet: rx_packets = %p, rx_packets[MAX_PACKETS] = %p, p = %p\n", */
500            /*            disp_get_core_id(), */
501            /*            rx_packets, &rx_packets[MAX_PACKETS], p); */
502            /* } */
503
504            /* printf("%d: Forwarding, opaque %p\n", disp_get_core_id(), p); */
505
506            // Forward packet
507	    errval_t err =
508	      subways_binding->tx_vtbl.send(subways_binding, NOP_CONT,
509					    (genvaddr_t)p->payload - rx_vbase,
510					    p->len,
511					    (uint64_t)p);
512	    if(err_is_fail(err)) {
513	      if(err_no(err) == FLOUNDER_ERR_TX_BUSY) {
514		printf("%d: Busy! retrying...\n", disp_get_core_id());
515
516		packet_sent = false;
517
518		// Process inter-subways events
519		struct waitset subways_ws;
520		waitset_init(&subways_ws);
521		errval_t err2 =
522		  subways_binding->change_waitset(subways_binding,
523						  &subways_ws);
524		assert(err_is_ok(err2));
525
526		struct monitor_binding *mb = get_monitor_binding();
527		assert(mb != NULL);
528		err2 = mb->change_waitset(mb, &subways_ws);
529		assert(err_is_ok(err2));
530
531                static struct send_packet_args args;
532                args.p = (genvaddr_t)p->payload - rx_vbase;
533                args.len = p->len;
534		args.pkt = (uint64_t)p;
535
536		struct event_closure txcont = MKCONT(send_packet, &args);
537		err2 =
538		  subways_binding->register_send(subways_binding,
539						 &subways_ws,
540						 txcont);
541		if (err_is_fail(err2)) {
542		  DEBUG_ERR(err2, "register_send on binding failed!");
543		}
544		assert(err_is_ok(err2));
545
546		while(!packet_sent) {
547		  /* printf("%d: In loop\n", disp_get_core_id()); */
548		  event_dispatch(&subways_ws);
549		}
550
551		/* printf("%d: done with loop\n", disp_get_core_id()); */
552
553		err2 = subways_binding->change_waitset(subways_binding,
554						       get_default_waitset());
555		assert(err_is_ok(err2));
556		err2 = mb->change_waitset(mb, get_default_waitset());
557		assert(err_is_ok(err2));
558	      } else {
559		DEBUG_ERR(err, "sending packet");
560		assert(err_is_ok(err));
561	      }
562	    }
563
564            // ARP management
565            if(peers_get_from_ip(iphdr->src.addr) == NULL) {
566                struct peer *newpeer = peers_get_next_free();
567                assert(p != NULL);
568
569                newpeer->ip = iphdr->src.addr;
570                memcpy(&newpeer->mac.addr, &ethhdr->src.addr, ETHARP_HWADDR_LEN);
571            }
572
573            // Trigger channel, so we keep polling
574            if (waitset_chan_is_registered(&recv_chanstate)) {
575                err = waitset_chan_trigger(&recv_chanstate);
576                assert(err_is_ok(err));
577            }
578
579	    // Don't re-register yet
580	    return;
581        }
582        break;
583
584    default:
585        break;
586    }
587
588 out:
589    {
590        //now we have consumed the preregistered pbuf containing a received packet
591        //which was processed in this function. Therefore we have to register a new
592        //free buffer for receiving packets.
593        errval_t err = rx_register_buffer_fn_ptr(p->pa, p->payload, p);
594        assert(err_is_ok(err));
595    }
596}
597
598bool handle_tx_done(void *opaque)
599{
600    struct packet *p = opaque;
601
602    /* printf("%d: handle_tx_done, opaque = %p\n", */
603    /*        disp_get_core_id(), p->opaque); */
604
605    if(receiver) {
606        assert(p >= tx_packets && p < &tx_packets[MAX_PACKETS]);
607        /* if(p >= tx_packets && p < &tx_packets[MAX_PACKETS]) { */
608        /*     /\* printf("Packet from TX ring, marking available\n"); *\/ */
609        /*     // Mark packet as available, if coming from TX packet array */
610        p->len = 0;
611        /*     /\* tx_packets_available++; *\/ */
612        /* } else { */
613        assert(p->opaque != NULL);
614        // Send back to sender
615        assert(subways_binding != NULL);
616        errval_t err =
617            subways_binding->tx_vtbl.tx_done(subways_binding, NOP_CONT,
618                                             (uint64_t)p->opaque);
619        if(err_is_fail(err)) {
620            DEBUG_ERR(err, "tx_done");
621        }
622        assert(err_is_ok(err));
623        /* } */
624    } else {
625        // TODO: Re-register?
626    }
627
628    return true;
629}
630
631/* allocate a single frame, mapping it into our vspace with given attributes */
632static void *alloc_map_frame(vregion_flags_t attr, size_t size, struct capref *retcap)
633{
634    struct capref frame;
635    errval_t r;
636
637    r = frame_alloc(&frame, size, NULL);
638    assert(err_is_ok(r));
639    void *va;
640    r = vspace_map_one_frame_attr(&va, size, frame, attr,
641                                  NULL, NULL);
642    if (err_is_fail(r)) {
643        DEBUG_ERR(r, "vspace_map_one_frame failed");
644        return NULL;
645    }
646
647    if (retcap != NULL) {
648        *retcap = frame;
649    }
650
651    return va;
652}
653
654static const char *eat_opts[] = {
655    "function=", "interrupts=", "queue=", "msix=", "vf=", "device=", "bus=", "use_vtd=",
656    NULL
657};
658
659static void subways_startup(struct subways_binding *b,
660                            struct capref frame)
661{
662    struct frame_identity id;
663    void *va;
664
665    /* printf("startup! Mapping packetring cap\n"); */
666
667    errval_t err = invoke_frame_identify(frame, &id);
668    assert(err_is_ok(err));
669    err = vspace_map_one_frame_attr(&va, 1 << id.bits, frame,
670                                    VREGION_FLAGS_READ_WRITE,
671                                    NULL, NULL);
672    assert(err_is_ok(err));
673
674    packetring_vbase = (genvaddr_t)va;
675    packetring_pbase = id.base;
676}
677
678static void subways_send(struct subways_binding *b, genvaddr_t offset,
679			 uint32_t len, uint64_t opaque)
680{
681    uint8_t *p = (uint8_t *)(packetring_vbase + offset);
682
683    /* printf("subways_send: offset = %" PRIxGENVADDR ", addr = %p, len = %u\n", */
684    /*        offset, p, len); */
685
686    /* printf("%d: subways_send: p = %p\n", */
687    /*        disp_get_core_id(), (void *)opaque); */
688
689    struct eth_hdr *ethhdr = (struct eth_hdr *)p;
690    struct ip_hdr *iphdr = (struct ip_hdr *)(p + SIZEOF_ETH_HDR);
691
692    uint8_t ttl = IPH_TTL(iphdr);
693    if(ttl == 1) {
694        /* printf("TTL = 1\n"); */
695
696        // Decapsulate
697        unsigned int outer_len = IPH_HL(iphdr) * 4;
698        void *inner_iphdr = p + SIZEOF_ETH_HDR + outer_len;
699        memcpy(iphdr, inner_iphdr, len - SIZEOF_ETH_HDR - outer_len);
700        len -= outer_len;
701    } else {
702        // Decrement TTL
703        /* printf("TTL = %u > 1\n", ttl); */
704        IPH_TTL_SET(iphdr, ttl - 1);
705
706        // Flip IP source and dest, recompute IP checksum
707        uint32_t tmpip = iphdr->dest.addr;
708        iphdr->dest.addr = iphdr->src.addr;
709        iphdr->src.addr = tmpip;
710        /* iphdr->_chksum = 0; */
711    }
712
713    // XXX: Asserting it's an IP packet to forward
714    struct packet *outp = get_tx_packet();
715
716    // TODO: Insert next-hop MAC instead of sending it back
717    memcpy(&ethhdr->dest, &ethhdr->src, ETHARP_HWADDR_LEN);
718    memcpy(&ethhdr->src, arranet_mymac, ETHARP_HWADDR_LEN);
719    /* memcpy(&myeth->src, &ethhdr->dest, ETHARP_HWADDR_LEN); */
720    /* myeth->type = htons(ETHTYPE_IP); */
721
722    outp->len = len;
723    outp->next = NULL;
724    /* outp->flags = NETIF_TXFLAG_IPCHECKSUM; */
725    outp->flags = 0;
726    outp->payload = p;
727    outp->opaque = (void *)opaque;
728    packet_output(outp);
729
730    /* switch (htons(ethhdr->type)) { */
731    /* case ETHTYPE_IP: */
732    /*     { */
733    /*         struct ip_hdr *iphdr = (struct ip_hdr *)(p + SIZEOF_ETH_HDR); */
734
735    /*         printf("%d: Is an IP packet, type %x\n", disp_get_core_id(), IPH_PROTO(iphdr)); */
736    /*     } */
737    /*     break; */
738
739    /* default: */
740    /*     printf("Unknown packet!\n"); */
741    /*     break; */
742    /* } */
743}
744
745static void subways_tx_done(struct subways_binding *b, uint64_t opaque)
746{
747  struct packet *p = (void *)opaque;
748  assert(p != NULL);
749
750  /* printf("%d: subways_tx_done, opaque = %p\n", disp_get_core_id(), p); */
751
752  /* if(p < rx_packets || p > &rx_packets[MAX_PACKETS]) { */
753  /*     printf("%d: subways_tx_done: rx_packets = %p, rx_packets[MAX_PACKETS] = %p, p = %p\n", */
754  /*            disp_get_core_id(), */
755  /*            rx_packets, &rx_packets[MAX_PACKETS], p); */
756  /* } */
757
758  assert(p >= rx_packets && p < &rx_packets[MAX_PACKETS]);
759
760  errval_t err = rx_register_buffer_fn_ptr(p->pa, p->payload, p);
761  assert(err_is_ok(err));
762}
763
764static struct subways_rx_vtbl subways_rx_vtbl = {
765    .startup = subways_startup,
766    .send = subways_send,
767    .tx_done = subways_tx_done,
768};
769
770static void subways_export_cb(void *st, errval_t err, iref_t iref)
771{
772    err = nameservice_register("subways_sender", iref);
773    assert(err_is_ok(err));
774    /* printf("subways interface exported\n"); */
775}
776
777static errval_t subways_connect_cb(void *st, struct subways_binding *b)
778{
779    /* printf("New connection on subways interface\n"); */
780    b->rx_vtbl = subways_rx_vtbl;
781    subways_binding = b;
782    return SYS_ERR_OK;
783}
784
785static void subways_bind_cb(void *st, errval_t err, struct subways_binding *b)
786{
787    assert(err_is_ok(err));
788    b->rx_vtbl = subways_rx_vtbl;
789
790    /* printf("Bound to subways receiver -- sending packet ring frame\n"); */
791
792    // Transfer packet ring
793    errval_t r = b->tx_vtbl.startup(b, NOP_CONT, packetring_frame);
794    assert(err_is_ok(r));
795
796    subways_binding = b;
797}
798
799static void do_nothing(void *arg)
800{
801    /* printf("Channel fired!\n"); */
802    errval_t err = waitset_chan_register_polled(get_default_waitset(),
803                                                &recv_chanstate,
804                                                MKCLOSURE(do_nothing, NULL));
805    assert(err_is_ok(err));
806}
807
808static uint64_t polling_cycles = 0;
809
810void arranet_polling_loop_proxy(void);
811void arranet_polling_loop_proxy(void)
812{
813    uint64_t start = rdtsc();
814    arranet_polling_loop();
815    polling_cycles += rdtsc() - start;
816}
817
818#ifdef PRINT_UTIL
819extern uint64_t closure_cycles, wait_cycles;
820
821static void print_cpu_util(void *dummy)
822{
823    static uint64_t last = 0;
824    uint64_t now = rdtsc();
825
826    if(last != 0) {
827        uint64_t elapsed = now - last;
828
829        printf("%d: Time elapsed %.2fs, %.2fs in polling, %.2fs in closures, "
830               "%.2fs waiting, CPU util %.2f%%\n",
831               disp_get_core_id(),
832               (float)elapsed / CYCLES_PER_S,
833               (float)polling_cycles / CYCLES_PER_S,
834               (float)closure_cycles / CYCLES_PER_S,
835               (float)wait_cycles / CYCLES_PER_S,
836               (((float)polling_cycles + closure_cycles) / elapsed) * 100.0);
837
838        polling_cycles = 0;
839        closure_cycles = 0;
840        wait_cycles = 0;
841    }
842
843    last = now;
844}
845#endif
846
847int main(int argc, char *argv[])
848{
849    uint8_t mac[6];
850
851    printf("Subways starting...\n");
852
853    errval_t err = skb_client_connect();
854    assert(err_is_ok(err));
855
856    err = skb_execute_query("vtd_enabled(0,C), write(vtd_coherency(C)).");
857    if (err_is_ok(err)) {
858        use_vtd = 1;
859        for(int i = 0; i < argc; i++) {
860	    if(!strncmp(argv[i], "use_vtd=", strlen("use_vtd=") - 1)) {
861	      use_vtd = !!atol(argv[i] + strlen("use_vtd="));
862              break;
863            }
864        }
865	err = skb_read_output("vtd_coherency(%d)", &vtd_coherency);
866	assert(err_is_ok(err));
867    }
868
869    for(int i = 0; i < argc; i++) {
870        if(!strncmp(argv[i], "receiver", strlen("receiver") - 1)) {
871            receiver = true;
872        }
873    }
874
875    if (use_vtd) {
876        err = connect_to_acpi();
877	assert(err_is_ok(err));
878	err = vtd_create_domain(cap_vroot);
879	assert(err_is_ok(err));
880	err = vtd_domain_add_device(0, 16, 16, 1, cap_vroot);
881	assert(err_is_ok(err));
882    }
883
884    {
885        // XXX: Interrupts on special waitset
886        static struct waitset int_waitset;
887        waitset_init(&int_waitset);
888        barrelfish_interrupt_waitset = &int_waitset;
889    }
890
891    e1000n_driver_init(argc, argv);
892
893    ether_get_mac_address_ptr(mac);
894    printf("Arranet MAC address %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx\n",
895           mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
896
897    struct capref frame;
898    uint8_t *ram_base = alloc_map_frame(VREGION_FLAGS_READ_WRITE,
899                                        MAX_PACKETS * PACKET_SIZE + 4096,
900                                        &packetring_frame);
901    assert(ram_base != NULL);
902
903    struct frame_identity id;
904    err = invoke_frame_identify(packetring_frame, &id);
905    assert(err_is_ok(err));
906
907    rx_pbase = id.base;
908    rx_vbase = (genvaddr_t)ram_base;
909
910    // Add buffers to RX ring for packet reception
911    for(int i = 0; i < MAX_PACKETS; i++) {
912        struct packet *p = &rx_packets[i];
913
914        // XXX: Use this for recvfrom_arranet to get alignment
915        /* p->payload = ram_base + (i * PACKET_SIZE) + 6; */
916        /* p->pa = id.base + (i * PACKET_SIZE) + 6; */
917        p->payload = ram_base + (i * PACKET_SIZE);
918        p->pa = id.base + (i * PACKET_SIZE);
919        p->len = PACKET_SIZE;
920        p->flags = 0;
921
922        err = rx_register_buffer_fn_ptr(p->pa, p->payload, p);
923        assert(err_is_ok(err));
924    }
925
926    // Allocate TX buffers (to have them all backed by one frame)
927    uint8_t *tx_bufs = alloc_map_frame(VREGION_FLAGS_READ_WRITE,
928                                       MAX_PACKETS * PACKET_SIZE, &frame);
929    assert(tx_bufs != NULL);
930
931    err = invoke_frame_identify(frame, &id);
932    assert(err_is_ok(err));
933    tx_pbase = id.base;
934    tx_vbase = (genvaddr_t)tx_bufs;
935
936    // Initialize TX packet descriptors
937    for(int i = 0; i < MAX_PACKETS; i++) {
938        /* tx_packets[i].payload = tx_bufs[i]; */
939        tx_packets[i].payload = tx_bufs + (i * PACKET_SIZE);
940    }
941
942    if (!vtd_coherency) {// For the UDP echo server
943        sys_debug_flush_cache();
944    }
945
946    // Determine my static IP address
947    for(int i = 0; i < sizeof(ip_config) / sizeof(struct mac2ip); i++) {
948        struct mac2ip *e = &ip_config[i];
949        if(!memcmp(mac, e->mac, ETHARP_HWADDR_LEN)) {
950            arranet_myip = htonl(e->ip);
951            memcpy(arranet_mymac, e->mac, ETHARP_HWADDR_LEN);
952            break;
953        }
954    }
955
956    if(arranet_myip == 0) {
957        USER_PANIC("Arranet: No static IP config for this MAC address!\n");
958    }
959
960    /***** Initialize IP/Ethernet packet header template *****/
961    {
962        struct pkt_ip_headers *p = &packet_ip_header;
963
964        // Initialize Ethernet header
965        memcpy(&p->eth.src, mac, ETHARP_HWADDR_LEN);
966        p->eth.type = htons(ETHTYPE_IP);
967
968        // Initialize IP header
969        p->ip._v_hl = 69;
970        p->ip._tos = 0;
971        p->ip._id = htons(3);
972        p->ip._offset = 0;
973        p->ip._ttl = 0xff;
974        p->ip._proto = 0;
975        p->ip._chksum = 0;
976        p->ip.src.addr = arranet_myip;
977    }
978
979    /***** Initialize UDP/IP/Ethernet packet header template *****/
980    {
981        struct pkt_udp_headers *p = &packet_udp_header;
982
983        // Initialize Ethernet header
984        memcpy(&p->eth.src, mac, ETHARP_HWADDR_LEN);
985        p->eth.type = htons(ETHTYPE_IP);
986
987        // Initialize IP header
988        p->ip._v_hl = 69;
989        p->ip._tos = 0;
990        p->ip._id = htons(3);
991        p->ip._offset = 0;
992        p->ip._ttl = 0xff;
993        p->ip._proto = IP_PROTO_UDP;
994        p->ip._chksum = 0;
995        p->ip.src.addr = arranet_myip;
996
997        // Initialize UDP header
998        p->udp.chksum = 0;
999    }
1000
1001    /***** Initialize TCP/IP/Ethernet packet header template *****/
1002    {
1003        struct pkt_tcp_headers *p = &packet_tcp_header;
1004
1005        // Initialize Ethernet header
1006        memcpy(&p->eth.src, mac, ETHARP_HWADDR_LEN);
1007        p->eth.type = htons(ETHTYPE_IP);
1008
1009        // Initialize IP header
1010        p->ip._v_hl = 69;
1011        p->ip._tos = 0;
1012        p->ip._id = htons(3);
1013        p->ip._offset = 0;
1014        p->ip._ttl = 0xff;
1015        p->ip._proto = IP_PROTO_TCP;
1016        p->ip._chksum = 0;
1017        p->ip.src.addr = arranet_myip;
1018
1019        // Initialize TCP header
1020        p->tcp.chksum = 0;
1021        p->tcp.wnd = 65535;
1022    }
1023
1024    /***** Eat driver-specific options *****/
1025    static char *new_argv[ARG_MAX];
1026    int new_argc = 0;
1027    for(int i = 0; i < argc; i++) {
1028        int j;
1029
1030        for(j = 0; eat_opts[j] != NULL; j++) {
1031            if(!strncmp(argv[i], eat_opts[j], strlen(eat_opts[j]) - 1)) {
1032                // Option matches -- delete!
1033                break;
1034            }
1035        }
1036
1037        if(eat_opts[j] == NULL) {
1038            // Option doesn't match -- keep!
1039            new_argv[new_argc++] = argv[i];
1040        }
1041    }
1042
1043    argc = new_argc;
1044    argv = new_argv;
1045
1046    if(receiver) {
1047        // Export receiver service
1048        errval_t r = subways_export(NULL, subways_export_cb, subways_connect_cb,
1049                                    get_default_waitset(), IDC_EXPORT_FLAGS_DEFAULT);
1050        assert(err_is_ok(r));
1051    } else {
1052        // Create connection to receiver and send it cap to receive ring
1053        iref_t iref;
1054        err = nameservice_blocking_lookup("subways_sender", &iref);
1055        assert(err_is_ok(err));
1056
1057        err = subways_bind(iref, subways_bind_cb, NULL, get_default_waitset(),
1058                           IDC_BIND_FLAGS_DEFAULT);
1059        assert(err_is_ok(err));
1060    }
1061
1062    // Register Arranet receive channel
1063    waitset_chanstate_init(&recv_chanstate, CHANTYPE_LWIP_SOCKET);
1064    err = waitset_chan_register_polled(get_default_waitset(),
1065				       &recv_chanstate,
1066				       MKCLOSURE(do_nothing, NULL));
1067    assert(err_is_ok(err));
1068
1069#ifdef PRINT_UTIL
1070    // Register CPU util printout every second
1071    struct periodic_event ev;
1072    err = periodic_event_create(&ev, get_default_waitset(), PRINT_PERIOD,
1073                                MKCLOSURE(print_cpu_util, NULL));
1074    assert(err_is_ok(err));
1075#endif
1076
1077    /* sys_debug_disable_timer(); */
1078
1079    for(;;) {
1080        event_dispatch(get_default_waitset());
1081    }
1082}
1083