1/*
2 * Copyright (c) 2014, University of Washington.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, CAB F.78, Universitaetstrasse 6, CH-8092 Zurich.
8 * Attn: Systems Group.
9 */
10
11/**
12 * \file
13 * \brief Arranet library code
14 */
15
16#include <stdio.h>
17#include <assert.h>
18#include <barrelfish/barrelfish.h>
19#include <barrelfish/inthandler.h>
20#include <barrelfish/sys_debug.h>
21#include <skb/skb.h>
22#include <sys/socket.h>
23#include <netif/e1000.h>
24#include <limits.h>
25#include <barrelfish/waitset.h>
26#include <barrelfish/waitset_chan.h>
27#include <lwip/sock_chan_support.h>
28#include <netdb.h>
29#include <arranet.h>
30#include <arranet_impl.h>
31#include <acpi_client/acpi_client.h>
32
33#include "inet_chksum.h"
34
35#include <arranet_debug.h>
36
37static ether_terminate_queue ether_terminate_queue_ptr = NULL;
38static ether_get_mac_address_t ether_get_mac_address_ptr = NULL;
39static ether_transmit_pbuf_list_t ether_transmit_pbuf_list_ptr = NULL;
40static ether_get_tx_free_slots tx_free_slots_fn_ptr = NULL;
41static ether_handle_free_TX_slot handle_free_tx_slot_fn_ptr = NULL;
42static ether_rx_register_buffer rx_register_buffer_fn_ptr = NULL;
43static ether_rx_get_free_slots rx_get_free_slots_fn_ptr = NULL;
44
45uint64_t interrupt_counter = 0;
46uint64_t total_rx_p_count = 0;
47uint64_t total_rx_datasize = 0;
48struct client_closure *g_cl = NULL;
49
50//#define MAX_PACKETS     1024
51#define MAX_PACKETS     2000
52#define PACKET_SIZE     2048
53
54#define MAX_PEERS       256
55
56static int use_vtd = 0;
57static int vtd_coherency = 1;
58
59struct peer {
60    uint32_t ip;
61    struct eth_addr mac;
62};
63
64// Configure static ARP entries here
65// IP addresses are in network byte order!
66static struct peer peers[MAX_PEERS] = {
67    {
68        // XXX: This needs to be updated each time the tap interface is re-initialized
69        .ip = 0x0102000a,       // 10.0.2.1
70        /* .mac.addr = "\x86\x86\x0b\xda\x22\xd7", */
71        .mac.addr = "\x12\x67\xb9\x3e\xe2\x2c",
72    },
73    {
74        // XXX: This needs to be updated each time the tap interface is re-initialized
75        .ip = 0x0164a8c0,       // 192.168.100.1
76        .mac.addr = "\x5e\x93\xf2\xf1\xeb\xfa",
77    },
78    {
79        .ip = 0xaf06d080,       // 128.208.6.175 - swingout2
80        .mac.addr = "\x90\xe2\xba\x3a\x2e\xdd",
81    },
82    {
83        .ip = 0xec06d080,       // 128.208.6.236 - swingout3
84        .mac.addr = "\xa0\x36\x9f\x0f\xfb\xe2",
85    },
86    {
87        .ip = 0x8106d080,       // 128.208.6.129 - swingout4
88        .mac.addr = "\xa0\x36\x9f\x10\x01\x6e",
89    },
90    {
91        .ip = 0x8206d080,       // 128.208.6.130 - swingout5
92        .mac.addr = "\xa0\x36\x9f\x10\x00\xa2",
93    },
94    {
95        .ip = 0xc506d080,       // 128.208.6.197 - swingout6
96        .mac.addr = "\xa0\x36\x9f\x10\x03\x52",
97    },
98};
99static int peers_alloc = 7;             // Set number of static ARP here!
100
101#ifdef DEBUG_LATENCIES
102static int rx_packets_available = MAX_PACKETS;
103#endif
104
105struct socket {
106    struct socket *prev, *next;
107    int type, protocol;
108    int fd;
109    bool passive, nonblocking, connected, hangup, shutdown;
110    struct sockaddr_in bound_addr;
111    struct sockaddr_in peer_addr;
112    uint32_t my_seq, peer_seq, next_ack;
113};
114
115struct pkt_ip_headers {
116    struct eth_hdr eth;
117    struct ip_hdr ip;
118} __attribute__ ((packed));
119
120struct pkt_udp_headers {
121    struct eth_hdr eth;
122    struct ip_hdr ip;
123    struct udp_hdr udp;
124} __attribute__ ((packed));
125
126struct pkt_tcp_headers {
127    struct eth_hdr eth;
128    struct ip_hdr ip;
129    struct tcp_hdr tcp;
130} __attribute__ ((packed));
131
132// All known connections and those in progress
133static struct socket *connections = NULL;
134
135static struct socket sockets[MAX_FD];
136static struct packet rx_packets[MAX_PACKETS];
137
138// XXX: Needs to be per socket later on
139static struct waitset_chanstate recv_chanstate;
140static struct waitset_chanstate send_chanstate;
141
142static struct packet *inpkt = NULL;
143
144#ifdef DEBUG_LATENCIES
145static size_t memcache_packets_received = 0;
146static size_t output_pipeline_stalled = 0;
147static size_t port_cnt[65536];
148static int lwip_send_time[POSIX_TRANSA];       // Time until packet was delivered to network interface
149static size_t lwip_send_transactions = 0;
150int posix_recv_time[POSIX_TRANSA];       // Time until packet at exit of recvfrom
151size_t posix_recv_transactions = 0;
152static int posix_send_time[POSIX_TRANSA];       // Time until packet at entry to sendto
153static size_t posix_send_transactions = 0;
154
155int memcache_times[20][POSIX_TRANSA];       // Time until packet was delivered to network interface
156size_t memcache_transactions[20];
157
158size_t hash_option1 = 0;
159size_t hash_option2 = 0;
160size_t hash_option3 = 0;
161size_t hash_length = 0;
162size_t hash_calls = 0;
163size_t hash_aligned = 0;
164size_t hash_unaligned = 0;
165#endif
166
167static bool arranet_udp_accepted = false;
168static bool arranet_tcp_accepted = false;
169static bool arranet_raw_accepted = false;
170
171//#define TCP_LOCAL_PORT_RANGE_START        0xc000
172#define TCP_LOCAL_PORT_RANGE_START        8081
173#define TCP_LOCAL_PORT_RANGE_END          0xffff
174
175static uint16_t free_tcp_ports[TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START + 1];
176static uint16_t free_tcp_tail = TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START,
177    free_tcp_free = TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START + 1;
178
179#ifdef SENDMSG_WITH_COPY
180static uint16_t free_tcp_head = 0;
181
182// In network byte order
183static uint16_t tcp_new_port(void)
184{
185  if(free_tcp_free > 0) {
186      free_tcp_free--;
187      u16_t new_port = free_tcp_ports[free_tcp_head];
188      free_tcp_head = (free_tcp_head + 1) % (TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START + 1);
189      /* printf("Allocating port %d\n", new_port); */
190      return new_port;
191  } else {
192      printf("No more free ports!\n");
193      return 0;
194  }
195}
196#endif
197
198static void tcp_free_port(uint16_t port)
199{
200    /* if(pcb->local_port == 8080) { */
201    /*     return; */
202    /* } */
203    /* if(pcb->local_port == 8080) { */
204    /*     printf("Freeing 8080 from %p %p %p\n", */
205    /*            __builtin_return_address(0), */
206    /*            __builtin_return_address(1), */
207    /*            __builtin_return_address(2)); */
208    /* } */
209    /* assert(pcb->local_port != 8080); */
210    assert(free_tcp_free < TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START + 1);
211
212    /* printf("Freeing port %d\n", pcb->local_port); */
213
214    /* for(int i = 0; i < free_tcp_free; i++) { */
215    /*     u16_t entry = free_tcp_ports[(i + free_tcp_head) % (TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START + 1)]; */
216    /*     assert(entry != pcb->local_port); */
217    /* } */
218
219    free_tcp_free++;
220    free_tcp_tail = (free_tcp_tail + 1) % (TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START + 1);
221    free_tcp_ports[free_tcp_tail] = port;
222}
223
224static struct socket *free_sockets_queue[MAX_FD];
225static int free_sockets_head = 0, free_sockets_tail = MAX_FD - 1,
226    free_sockets = MAX_FD;
227
228static struct socket *alloc_socket(void)
229{
230    if(free_sockets == 0) {
231        return NULL;
232    }
233
234    free_sockets--;
235    struct socket *new_socket = free_sockets_queue[free_sockets_head];
236    // Reset all fields except FD
237    int fd_save = new_socket->fd;
238    uint32_t seq_save = new_socket->my_seq;
239    memset(new_socket, 0, sizeof(struct socket));
240    new_socket->fd = fd_save;
241    new_socket->my_seq = seq_save + 1000;
242    free_sockets_head = (free_sockets_head + 1) % MAX_FD;
243    /* printf("alloc_socket: returned %p\n", new_socket); */
244    return new_socket;
245}
246
247static void free_socket(struct socket *sock)
248{
249    /* printf("free_socket: %p\n", sock); */
250    assert(sock != NULL);
251    assert(free_sockets < MAX_FD);
252    free_sockets++;
253    free_sockets_tail = (free_sockets_tail + 1) % MAX_FD;
254    free_sockets_queue[free_sockets_tail] = sock;
255}
256
257/******** IP config *********/
258
259struct mac2ip {
260    uint8_t mac[ETHARP_HWADDR_LEN];
261    uint32_t ip;
262};
263
264static struct mac2ip ip_config[] = {
265    {   // QEMU
266        .mac = "\x52\x54\x00\x12\x34\x56",
267        /* .ip = 0x0a00020f,       // 10.0.2.15 */
268        .ip = 0xc0a8640f,       // 192.168.100.15
269    },
270    {
271        // QEMU2
272        .mac = "\x52\x54\x00\x12\x34\x57",
273        .ip = 0xc0a80102,       // 192.168.1.2
274    },
275    {   // swingout1 (and swingout1-vf0)
276        .mac = "\xa0\x36\x9f\x10\x00\xa6",
277        .ip = 0x80d00643,       // 128.208.6.67
278    },
279    {   // swingout1-vf1
280        .mac = "\x22\xc9\xfc\x96\x83\xfc",
281        .ip = 0x80d00644,       // 128.208.6.68
282    },
283    {   // swingout1-vf2
284        .mac = "\xce\x43\x5b\xf7\x3e\x60",
285        .ip = 0x80d00602,       // 128.208.6.2
286    },
287    {   // swingout1-vf3
288        .mac = "\x6a\xb0\x62\xf6\xa7\x21",
289        .ip = 0x80d00603,       // 128.208.6.3
290    },
291    {   // swingout1-vf4
292        .mac = "\xb2\xdf\xf9\x39\xc6\x10",
293        .ip = 0x80d00604,       // 128.208.6.4
294    },
295    {   // swingout1-vf5
296        .mac = "\x92\x77\xe7\x3f\x80\x30",
297        .ip = 0x80d0060c,       // 128.208.6.12
298    },
299    {   // swingout5
300        .mac = "\xa0\x36\x9f\x10\x00\xa2",
301        .ip = 0x80d00682,       // 128.208.6.130
302    },
303};
304
305static uint8_t arranet_mymac[ETHARP_HWADDR_LEN];
306static uint32_t arranet_myip = 0;
307
308int lwip_read(int s, void *mem, size_t len)
309{
310    return lwip_recv(s, mem, len, 0);
311}
312
313int lwip_write(int s, const void *data, size_t size)
314{
315    return lwip_send(s, data, size, 0);
316}
317
318int lwip_fcntl(int s, int cmd, int val)
319{
320    struct socket *sock = &sockets[s];
321    int retval = 0;
322
323    switch(cmd) {
324    case F_GETFL:
325        retval = sock->nonblocking ? O_NONBLOCK : 0;
326        break;
327
328    case F_SETFL:
329        sock->nonblocking = val & O_NONBLOCK ? true : false;
330        break;
331
332    default:
333        assert(!"NYI");
334        retval = -1;
335        break;
336    }
337
338    return retval;
339}
340
341int lwip_listen(int s, int backlog)
342{
343    struct socket *sock = &sockets[s];
344    sock->passive = true;
345    return 0;
346}
347
348int lwip_getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
349{
350    int retval = 0;
351
352    switch(level) {
353    case SOL_SOCKET:
354        switch(optname) {
355        case SO_SNDBUF:
356            {
357                assert(*optlen >= sizeof(int));
358                int *ret = optval;
359                *ret = PACKET_SIZE;
360                *optlen = sizeof(int);
361            }
362            break;
363
364        case SO_ERROR:
365            {
366                assert(*optlen >= sizeof(int));
367                int *ret = optval;
368                struct socket *sock = &sockets[s];
369                assert(sock != NULL);
370                *ret = sock->connected ? 0 : EINPROGRESS;
371                *optlen = sizeof(int);
372            }
373            break;
374
375        default:
376            assert(!"NYI");
377            retval = -1;
378            break;
379        }
380        break;
381
382    default:
383        assert(!"NYI");
384        retval = -1;
385        break;
386    }
387
388    return retval;
389}
390
391int lwip_setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen)
392{
393    int retval = 0;
394
395    switch(level) {
396    case SOL_SOCKET:
397        switch(optname) {
398        case SO_REUSEADDR:
399        case SO_REUSEPORT:
400            // No-op
401            break;
402
403        case SO_SNDBUF:
404            {
405                int len = *(const int *)optval;
406                if(len > PACKET_SIZE) {
407                    retval = -1;
408                }
409            }
410            break;
411
412        default:
413            printf("%d, %d\n", level, optname);
414            assert(!"NYI");
415            retval = -1;
416            break;
417        }
418        break;
419
420    case IPPROTO_TCP:
421        switch(optname) {
422        case TCP_NODELAY:
423            // XXX: No-op. We don't support Nagling anyway.
424            break;
425        }
426        break;
427
428    default:
429        assert(!"NYI");
430        retval = -1;
431        break;
432    }
433
434    return retval;
435}
436
437int lwip_getsockname(int s, struct sockaddr *name, socklen_t *namelen)
438{
439    struct socket *sock = &sockets[s];
440    assert(sock != NULL);
441    assert(*namelen >= sizeof(struct sockaddr_in));
442
443    memcpy(name, &sock->bound_addr, sizeof(struct sockaddr_in));
444    *namelen = sizeof(struct sockaddr_in);
445
446    return 0;
447}
448
449int lwip_getaddrinfo(const char *nodename, const char *servname,
450                     const struct addrinfo *hints, struct addrinfo **res)
451{
452    struct addrinfo *r = calloc(1, sizeof(struct addrinfo));
453    struct sockaddr_in *sa = calloc(1, sizeof(struct sockaddr_in));
454
455    assert(hints != NULL);
456
457    sa->sin_family = AF_INET;
458    sa->sin_port = htons(atoi(servname));
459    sa->sin_addr.s_addr = INADDR_ANY;
460
461    // Return dummy UDP socket address
462    r->ai_flags = AI_PASSIVE;
463    r->ai_family = AF_INET;
464    if(hints->ai_socktype != 0) {
465        r->ai_socktype = hints->ai_socktype;
466    } else {
467        r->ai_socktype = SOCK_DGRAM;
468    }
469    r->ai_protocol = hints->ai_protocol;
470    r->ai_addrlen = sizeof(struct sockaddr_in);
471    r->ai_addr = (struct sockaddr *)sa;
472    r->ai_canonname = NULL;
473    r->ai_next = NULL;
474
475    *res = r;
476    return 0;
477}
478
479void lwip_freeaddrinfo(struct addrinfo *ai)
480{
481    for(struct addrinfo *i = ai; i != NULL;) {
482        struct addrinfo *oldi = i;
483        free(i->ai_addr);
484        i = i->ai_next;
485        free(oldi);
486    }
487}
488
489/* The following 2 are #defined in lwIP 1.4.1, but not in 1.3.1, duplicating them here */
490
491char *inet_ntoa(struct in_addr addr)
492{
493    return ipaddr_ntoa((ip_addr_t *)&addr);
494}
495
496int inet_aton(const char *cp, struct in_addr *addr)
497{
498    return ipaddr_aton(cp, (ip_addr_t *)addr);
499}
500
501u32_t inet_addr(const char *cp)
502{
503    return ipaddr_addr(cp);
504}
505
506/***** lwIP-compatibility functions, so that NFS and RPC code compiles *****/
507
508u8_t pbuf_free_tagged(struct pbuf *p, const char *func_name, int line_no)
509{
510    assert(!"NYI");
511    return 0;
512}
513
514u8_t pbuf_header(struct pbuf *p, s16_t header_size_increment)
515{
516    assert(!"NYI");
517    return 0;
518}
519
520struct udp_pcb;
521
522err_t udp_send(struct udp_pcb *pcb, struct pbuf *p);
523err_t udp_send(struct udp_pcb *pcb, struct pbuf *p)
524{
525    assert(!"NYI");
526    return 0;
527}
528
529struct udp_pcb *udp_new(void);
530struct udp_pcb *udp_new(void)
531{
532    assert(!"NYI");
533    return 0;
534}
535
536void udp_recv(struct udp_pcb *pcb,
537              void (*recvfn) (void *arg, struct udp_pcb * upcb,
538                            struct pbuf * p,
539                            struct ip_addr * addr,
540                            u16_t port), void *recv_arg);
541void udp_recv(struct udp_pcb *pcb,
542              void (*recvfn) (void *arg, struct udp_pcb * upcb,
543                            struct pbuf * p,
544                            struct ip_addr * addr,
545                            u16_t port), void *recv_arg)
546{
547    assert(!"NYI");
548}
549
550void udp_remove(struct udp_pcb *pcb);
551void udp_remove(struct udp_pcb *pcb)
552{
553    assert(!"NYI");
554    return 0;
555}
556
557err_t udp_connect(struct udp_pcb *pcb, ip_addr_t *ipaddr, u16_t port);
558err_t udp_connect(struct udp_pcb *pcb, ip_addr_t *ipaddr, u16_t port)
559{
560    assert(!"NYI");
561    return 0;
562}
563
564struct pbuf *pbuf_alloc_tagged(pbuf_layer layer, u16_t length, pbuf_type type, const char *func_name, int line_no)
565{
566    assert(!"NYI");
567    return 0;
568}
569
570void lwip_record_event_simple(uint8_t event_type, uint64_t ts);
571void lwip_record_event_simple(uint8_t event_type, uint64_t ts)
572{
573    assert(!"NYI");
574}
575
576uint64_t wrapper_perform_lwip_work(void);
577uint64_t wrapper_perform_lwip_work(void)
578{
579    assert(!"NYI");
580    return 0;
581}
582
583bool lwip_init_auto(void);
584bool lwip_init_auto(void)
585{
586    assert(!"NYI");
587    return 0;
588}
589
590/******** NYI *********/
591
592struct thread_mutex *lwip_mutex = NULL;
593struct waitset *lwip_waitset = NULL;
594
595void lwip_mutex_lock(void)
596{
597}
598
599void lwip_mutex_unlock(void)
600{
601}
602
603struct hostent *lwip_gethostbyname(const char *name)
604{
605    assert(!"NYI");
606}
607
608int lwip_getpeername(int s, struct sockaddr *name, socklen_t *namelen)
609{
610    assert(!"NYI");
611}
612
613/******** NYI END *********/
614
615void ethernetif_backend_init(char *service_name, uint64_t queueid,
616                             ether_get_mac_address_t get_mac_ptr,
617                             ether_terminate_queue terminate_queue_ptr,
618                             ether_transmit_pbuf_list_t transmit_ptr,
619                             ether_get_tx_free_slots tx_free_slots_ptr,
620                             ether_handle_free_TX_slot handle_free_tx_slot_ptr,
621                             size_t rx_bufsz,
622                             ether_rx_register_buffer rx_register_buffer_ptr,
623                             ether_rx_get_free_slots rx_get_free_slots_ptr)
624{
625    ether_terminate_queue_ptr = terminate_queue_ptr;
626    ether_get_mac_address_ptr = get_mac_ptr;
627    ether_transmit_pbuf_list_ptr = transmit_ptr;
628    tx_free_slots_fn_ptr = tx_free_slots_ptr;
629    handle_free_tx_slot_fn_ptr = handle_free_tx_slot_ptr;
630    rx_register_buffer_fn_ptr = rx_register_buffer_ptr;
631    rx_get_free_slots_fn_ptr = rx_get_free_slots_ptr;
632    /* printf("PBUF_POOL_BUFSIZE = %u, rx buffer size = %zu\n", PBUF_POOL_BUFSIZE, */
633    /*        rx_bufsz); */
634}
635
636#define MAX_DRIVER_BUFS         16
637
638static genpaddr_t rx_pbase = 0, tx_pbase = 0;
639static genvaddr_t rx_vbase = 0, tx_vbase = 0;
640
641static struct packet tx_packets[MAX_PACKETS];
642/* static uint8_t tx_bufs[MAX_PACKETS][PACKET_SIZE]; */
643static unsigned int tx_idx = 0;
644/* static ssize_t tx_packets_available = MAX_PACKETS; */
645
646#include <barrelfish/deferred.h>
647
648static void packet_output(struct packet *p)
649{
650    struct driver_buffer bufs[MAX_DRIVER_BUFS];
651    int n = 0;
652
653#ifdef DEBUG_LATENCIES
654    if(memcache_transactions[6] < POSIX_TRANSA) {
655        if(p->next == NULL) {
656            assert(p->next == NULL && p->len >= sizeof(protocol_binary_request_no_extras));
657            protocol_binary_request_no_extras *mypayload = (void *)p->payload + SIZEOF_ETH_HDR + 20 + sizeof(struct udp_hdr) + UDP_HEADLEN;
658            memcache_times[6][memcache_transactions[6]] = get_time() - mypayload->message.header.request.opaque;
659            memcache_transactions[6]++;
660        } else {
661            protocol_binary_request_no_extras *mypayload = (void *)p->next->payload + UDP_HEADLEN;
662            memcache_times[6][memcache_transactions[6]] = get_time() - mypayload->message.header.request.opaque;
663            memcache_transactions[6]++;
664        }
665    }
666#endif
667
668    for (struct packet *q = p; q != NULL; q = q->next) {
669        struct driver_buffer *buf = &bufs[n];
670
671        /* if(q->payload < &tx_bufs[0][0] || q->payload >= &tx_bufs[MAX_PACKETS][PACKET_SIZE]) { */
672        /*     printf("Called from %p %p\n", */
673        /*            __builtin_return_address(0), */
674        /*            __builtin_return_address(1)); */
675        /*     assert(q->payload >= &tx_bufs[0][0] && q->payload < &tx_bufs[MAX_PACKETS][PACKET_SIZE]); */
676        /* } */
677
678        /* Send the data from the pbuf to the interface, one pbuf at a
679           time. The size of the data in each pbuf is kept in the ->len
680           variable. */
681        assert(q->len > 0);
682
683        // Check if it's from the RX region
684        /* printf("RX region: Comparing %p against [%p:%p]\n", */
685        /*        q->payload, */
686        /*        (void *)rx_vbase, */
687        /*        (void *)(rx_vbase + (MAX_PACKETS * PACKET_SIZE + 4096))); */
688	if (!use_vtd) {
689            if(((genvaddr_t)q->payload) >= rx_vbase &&
690               ((genvaddr_t)q->payload) < rx_vbase + (MAX_PACKETS * PACKET_SIZE + 4096)) {
691                buf->pa = rx_pbase + ((genvaddr_t)q->payload - rx_vbase);
692            } else if(((genvaddr_t)q->payload) >= tx_vbase &&
693                      ((genvaddr_t)q->payload) < tx_vbase + (MAX_PACKETS * PACKET_SIZE)) {
694                // It is from the TX region!
695                buf->pa = tx_pbase + ((genvaddr_t)q->payload - tx_vbase);
696            } else {
697                // Check if it's in morecore's region
698                struct morecore_state *mc_state = get_morecore_state();
699                struct vspace_mmu_aware *mmu_state = &mc_state->mmu_state;
700                genvaddr_t base = vregion_get_base_addr(&mmu_state->vregion);
701                struct memobj_frame_list *i;
702
703                // Walk frame list
704                for(i = mmu_state->memobj.frame_list; i != NULL; i = i->next) {
705                    // If address is completely within frame, we can resolve
706                    // XXX: Everything else would be easier with an IOMMU
707		    /* printf("Heap: Comparing [%p:%p] against [%p:%p]\n", */
708		    /*        q->payload, q->payload + q->len, */
709		    /*        (void *)(base + i->offset), */
710		    /*        (void *)(base + i->offset + i->size)); */
711                    if(base + i->offset <= (genvaddr_t)q->payload &&
712                       ((genvaddr_t)q->payload) + q->len < base + i->offset + i->size) {
713                        assert(i->pa != 0);
714
715                        /* buf->pa = id.base + ((genvaddr_t)q->payload - base - i->offset); */
716                        buf->pa = i->pa + ((genvaddr_t)q->payload - base - i->offset);
717                        break;
718                    }
719                }
720
721                if(i == NULL) {
722                    // Check if it's in text/data region
723                    int entry;
724                    for(entry = 0; entry < mc_state->v2p_entries; entry++) {
725                        struct v2pmap *pmap = &mc_state->v2p_mappings[entry];
726
727                        // If address is completely within frame, we can resolve
728                        // XXX: Everything else would be easier with an IOMMU
729                        /* printf("BSS: Comparing [%p:%p] against [%p:%p]\n", */
730                        /*        q->payload, q->payload + q->len, */
731                        /*        (void *)(pmap->va), */
732                        /*        (void *)(pmap->va + pmap->size)); */
733                        if(pmap->va <= (genvaddr_t)q->payload &&
734                                ((genvaddr_t)q->payload) + q->len < pmap->va + pmap->size) {
735                            buf->pa = pmap->pa + ((genvaddr_t)q->payload - pmap->va);
736                            break;
737                        }
738                    }
739
740                    if(entry == mc_state->v2p_entries) {
741                        printf("Called from %p %p\n",
742                                __builtin_return_address(0),
743                                __builtin_return_address(1),
744                                __builtin_return_address(2));
745
746                        USER_PANIC("Invalid pbuf! payload = %p, pa = %p, subpacket = %d\n",
747                                   q->payload, buf->pa, n);
748                    }
749                }
750            }
751        }
752
753        /* printf("Sending: '%s'\n", (char *)q->payload); */
754
755        buf->va = q->payload;
756        buf->len = q->len;
757#ifndef SENDMSG_WITH_COPY
758        buf->opaque = q->opaque;
759#else
760        buf->opaque = q;
761#endif
762        buf->flags = q->flags;
763
764        n++;
765    }
766
767#ifdef DEBUG_LATENCIES
768    if(lwip_send_transactions < POSIX_TRANSA) {
769        struct ip_hdr *iphdr = (struct ip_hdr *)(p->payload + SIZEOF_ETH_HDR);
770
771        if(IPH_PROTO(iphdr) == IP_PROTO_UDP) {
772            struct udp_hdr *udphdr = (struct udp_hdr *)(p->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4));
773            if(htons(udphdr->src) == 11212 || htons(udphdr->src) == 11211) {
774                protocol_binary_response_no_extras *mypayload;
775                if(p->next != NULL) {
776                    mypayload = (void *)p->next->next->payload;
777                } else {
778                    mypayload = (void *)p->payload + sizeof(struct pkt_udp_headers) + UDP_HEADLEN;
779                }
780                lwip_send_time[lwip_send_transactions] = get_time() - mypayload->message.header.response.opaque;
781                lwip_send_transactions++;
782            } else if (htons(udphdr->src) == 1234) {
783                protocol_binary_request_no_extras *mypayload;
784                if(p->next == NULL) {
785                    mypayload = (void *)p->payload + sizeof(struct pkt_udp_headers) + UDP_HEADLEN;
786                } else {
787                    mypayload = (void *)p->next->payload + UDP_HEADLEN;
788                }
789                lwip_send_time[lwip_send_transactions] = get_time() - mypayload->message.header.request.opaque;
790                lwip_send_transactions++;
791            }
792        }
793    }
794#endif
795
796    errval_t err = ether_transmit_pbuf_list_ptr(bufs, n);
797    assert(err_is_ok(err));
798}
799
800void arranet_recv_free(struct packet *p)
801{
802    assert(p >= rx_packets && p < &rx_packets[MAX_PACKETS]);
803
804#ifdef DEBUG_LATENCIES
805    rx_packets_available++;
806#endif
807    errval_t err = rx_register_buffer_fn_ptr(p->pa, p->payload, p);
808    assert(err_is_ok(err));
809}
810
811struct recv_udp_args {
812    void *buf;
813    size_t len;
814    int recv_len;
815    struct sockaddr *src_addr;
816    socklen_t *addrlen;
817    struct packet **inpkt;
818};
819
820struct recv_tcp_args {
821    void *buf;
822    size_t len;
823    int recv_len;
824    struct sockaddr *src_addr;
825    socklen_t *addrlen;
826    struct packet **inpkt;
827    bool syn, for_me;
828    uint32_t in_seqno;
829    struct socket *sock;
830};
831
832struct recv_raw_args {
833    void *buf;
834    size_t len;
835    int recv_len;
836    struct sockaddr *src_addr;
837    socklen_t *addrlen;
838    /* struct packet **inpkt; */
839};
840
841static void sock_recved_udp_packet(void *arg)
842{
843    struct recv_udp_args *args = arg;
844    assert(inpkt != NULL);
845    assert(inpkt->next == NULL);
846
847    // Process headers
848    struct ip_hdr *iphdr = (struct ip_hdr *)(inpkt->payload + SIZEOF_ETH_HDR);
849
850    assert(IPH_PROTO(iphdr) == IP_PROTO_UDP);
851
852    struct udp_hdr *udphdr = (struct udp_hdr *)(inpkt->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4));
853    size_t hdr_len = SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4) + sizeof(struct udp_hdr);
854    uint8_t *payload = inpkt->payload + hdr_len;
855    uint16_t pkt_len = htons(udphdr->len) - sizeof(struct udp_hdr);
856    assert(args->buf != NULL);      // No accept() allowed
857
858    // Fill in src_addr if provided
859    if(args->src_addr != NULL) {
860        struct sockaddr_in *addr = (struct sockaddr_in *)args->src_addr;
861
862        assert(*args->addrlen >= sizeof(struct sockaddr_in));
863        memset(addr, 0, sizeof(struct sockaddr_in));
864        addr->sin_len = sizeof(struct sockaddr_in);
865        addr->sin_family = AF_INET;
866        addr->sin_port = udphdr->src;
867        addr->sin_addr.s_addr = iphdr->src.addr;
868        *args->addrlen = sizeof(struct sockaddr_in);
869    }
870
871    // It's a recvfrom!
872    if(args->len != 0) {
873#ifdef DEBUG_LATENCIES
874    if(memcache_transactions[0] < POSIX_TRANSA) {
875        protocol_binary_request_no_extras *mypayload = (void *)payload + UDP_HEADLEN;
876        memcache_times[0][memcache_transactions[0]] = get_time() - mypayload->message.header.request.opaque;
877        memcache_transactions[0]++;
878    }
879#endif
880
881        args->recv_len = MIN(args->len, pkt_len);
882        memcpy(args->buf, payload, args->recv_len);
883
884#ifdef DEBUG_LATENCIES
885    if(memcache_transactions[1] < POSIX_TRANSA) {
886        protocol_binary_request_no_extras *mypayload = (void *)payload + UDP_HEADLEN;
887        memcache_times[1][memcache_transactions[1]] = get_time() - mypayload->message.header.request.opaque;
888        memcache_transactions[1]++;
889    }
890#endif
891
892#ifdef DEBUG_LATENCIES
893        rx_packets_available++;
894#endif
895        errval_t err = rx_register_buffer_fn_ptr(inpkt->pa, inpkt->payload, inpkt);
896        assert(err_is_ok(err));
897    } else {
898        args->recv_len = pkt_len;
899        *((void **)args->buf) = payload;
900        *args->inpkt = inpkt;
901    }
902
903    // Input packet is consumed in stack
904    inpkt = NULL;
905}
906
907static void sock_recved_raw_packet(void *arg)
908{
909    struct recv_raw_args *args = arg;
910    assert(inpkt != NULL);
911    assert(inpkt->next == NULL);
912
913    // Process headers
914    struct ip_hdr *iphdr = (struct ip_hdr *)(inpkt->payload + SIZEOF_ETH_HDR);
915    assert(args->buf != NULL);      // No accept() allowed
916    uint16_t pkt_len = ntohs(IPH_LEN(iphdr));
917    uint8_t *payload = (void *)iphdr;
918
919    // Fill in src_addr if provided
920    if(args->src_addr != NULL) {
921        struct sockaddr_in *addr = (struct sockaddr_in *)args->src_addr;
922
923        assert(*args->addrlen >= sizeof(struct sockaddr_in));
924        memset(addr, 0, sizeof(struct sockaddr_in));
925        addr->sin_len = sizeof(struct sockaddr_in);
926        addr->sin_family = AF_INET;
927        addr->sin_port = 0;
928        addr->sin_addr.s_addr = iphdr->src.addr;
929        *args->addrlen = sizeof(struct sockaddr_in);
930    }
931
932    // It's a recvfrom!
933    assert(args->len != 0);
934    args->recv_len = MIN(args->len, pkt_len);
935    memcpy(args->buf, payload, args->recv_len);
936    errval_t err = rx_register_buffer_fn_ptr(inpkt->pa, inpkt->payload, inpkt);
937    assert(err_is_ok(err));
938
939    // Input packet is consumed in stack
940    inpkt = NULL;
941}
942
943static void sock_recved_tcp_packet(void *arg)
944{
945    struct recv_tcp_args *args = arg;
946
947    // Received only a FIN?
948    if(inpkt == NULL) {
949        args->recv_len = 0;
950        args->for_me = true;
951        return;
952    }
953    assert(inpkt != NULL);
954    assert(inpkt->next == NULL);
955
956    // Process headers
957    struct ip_hdr *iphdr = (struct ip_hdr *)(inpkt->payload + SIZEOF_ETH_HDR);
958    assert(IPH_PROTO(iphdr) == IP_PROTO_TCP);
959    struct tcp_hdr *tcphdr = (struct tcp_hdr *)(inpkt->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4));
960    size_t hdr_len = SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4) + (TCPH_HDRLEN(tcphdr) * 4);
961    uint8_t *payload = inpkt->payload + hdr_len;
962    uint16_t pkt_len = htons(IPH_LEN(iphdr)) - (TCPH_HDRLEN(tcphdr) * 4) - (IPH_HL(iphdr) * 4);
963
964    args->in_seqno = tcphdr->seqno;
965    args->for_me = true;
966
967    // Is this from an accept() call?
968    if(args->buf == NULL) {
969        if(TCPH_FLAGS(tcphdr) & TCP_SYN) {
970            args->syn = true;
971        } else {
972            // Don't consume packet
973            args->for_me = false;
974            return;
975        }
976    } else {    // From a recv() call
977        if(TCPH_FLAGS(tcphdr) & TCP_SYN) {
978            // Don't consume packet
979            args->syn = true;
980            args->for_me = false;
981            return;
982        } else {
983            assert(args->sock != NULL);
984            // Is this for the socket that's calling?
985            if(tcphdr->dest != args->sock->bound_addr.sin_port ||
986               tcphdr->src != args->sock->peer_addr.sin_port) {
987                // Don't consume packet
988                args->for_me = false;
989                return;
990            }
991
992            if(args->len != 0) {
993                assert(args->len >= pkt_len);
994                args->recv_len = MIN(args->len, pkt_len);
995                memcpy(args->buf, payload, args->recv_len);
996            } else {
997                assert(!"NYI");
998                args->recv_len = pkt_len;
999                *((void **)args->buf) = payload;
1000                *args->inpkt = inpkt;
1001            }
1002        }
1003    }
1004
1005    // Fill in src_addr if provided
1006    if(args->src_addr != NULL) {
1007        struct sockaddr_in *addr = (struct sockaddr_in *)args->src_addr;
1008
1009        assert(*args->addrlen >= sizeof(struct sockaddr_in));
1010        memset(addr, 0, sizeof(struct sockaddr_in));
1011        addr->sin_len = sizeof(struct sockaddr_in);
1012        addr->sin_family = AF_INET;
1013        addr->sin_port = tcphdr->src;
1014        addr->sin_addr.s_addr = iphdr->src.addr;
1015        *args->addrlen = sizeof(struct sockaddr_in);
1016    }
1017
1018#ifdef DEBUG_LATENCIES
1019    rx_packets_available++;
1020#endif
1021    errval_t err = rx_register_buffer_fn_ptr(inpkt->pa, inpkt->payload, inpkt);
1022    assert(err_is_ok(err));
1023
1024    // Input packet is consumed in stack
1025    inpkt = NULL;
1026}
1027
1028int lwip_recv(int s, void *mem, size_t len, int flags)
1029{
1030    /* printf("lwip_recv(%d)\n", s); */
1031    assert(arranet_tcp_accepted);
1032    struct socket *sock = &sockets[s];
1033    struct recv_tcp_args args = {
1034        .buf = mem,
1035        .len = len,
1036        .src_addr = NULL,
1037        .syn = false,
1038        .sock = sock,
1039    };
1040    struct waitset ws;
1041    waitset_init(&ws);
1042
1043    errval_t err = waitset_chan_register_polled(&ws, &recv_chanstate,
1044                                                MKCLOSURE(sock_recved_tcp_packet, &args));
1045    assert(err_is_ok(err));
1046
1047    /* if socket is ready, trigger event right away */
1048    if (lwip_sock_ready_read(s)) {
1049        err = waitset_chan_trigger(&recv_chanstate);
1050        assert(err_is_ok(err));
1051    }
1052
1053    if(sock->nonblocking) {
1054        err = event_dispatch_non_block(&ws);
1055        if(err_no(err) == LIB_ERR_NO_EVENT || !args.for_me) {
1056            err = waitset_chan_deregister(&recv_chanstate);
1057            assert(err_is_ok(err) ||
1058                   (err_no(err) == LIB_ERR_CHAN_NOT_REGISTERED && !args.for_me));
1059            errno = EAGAIN;
1060            args.recv_len = -1;
1061        } else {
1062            errno = 0;
1063            assert(err_is_ok(err));
1064        }
1065    } else {
1066        err = event_dispatch(&ws);
1067        assert(err_is_ok(err));
1068        if(args.syn) {
1069            assert(!"Will block forever");
1070        }
1071        errno = 0;
1072    }
1073
1074    if(errno != EAGAIN) {
1075        sock->peer_seq = htonl(args.in_seqno);
1076        sock->next_ack = sock->peer_seq + args.recv_len;
1077        /* printf("lwip_recv: Assigning %p, %x\n", sock, sock->next_ack); */
1078    } else {
1079        // Did it shutdown?
1080        if(sock->hangup) {
1081            errno = 0;
1082            args.recv_len = 0;
1083        }
1084    }
1085
1086#ifdef DEBUG_LATENCIES
1087    if(posix_recv_transactions < POSIX_TRANSA) {
1088        protocol_binary_request_no_extras *mypayload = mem + UDP_HEADLEN;
1089        posix_recv_time[posix_recv_transactions] = get_time() - mypayload->message.header.request.opaque;
1090        posix_recv_transactions++;
1091    }
1092#endif
1093
1094    // Packet is now in buffer
1095    /* printf("lwip_recv returned %d\n", args.recv_len); */
1096    return args.recv_len;
1097}
1098
1099int lwip_sendto(int s, const void *data, size_t size, int flags,
1100                const struct sockaddr *to, socklen_t tolen)
1101{
1102    struct iovec io = {
1103        .iov_base = (void *)data,
1104        .iov_len = size,
1105    };
1106
1107    struct msghdr msg = {
1108        .msg_name = (void *)to,
1109        .msg_namelen = tolen,
1110        .msg_iov = &io,
1111        .msg_iovlen = 1,
1112        .msg_flags = 0,
1113    };
1114
1115    return lwip_sendmsg(s, &msg, flags);
1116}
1117
1118int lwip_socket(int domain, int type, int protocol)
1119{
1120    // XXX: Accept UDP or TCP, based on created sockets
1121    switch(type) {
1122    case SOCK_STREAM:
1123        assert(!arranet_udp_accepted);
1124        arranet_tcp_accepted = true;
1125        break;
1126
1127    case SOCK_DGRAM:
1128        assert(!arranet_tcp_accepted);
1129        arranet_udp_accepted = true;
1130        break;
1131
1132    case SOCK_RAW:
1133      assert(!arranet_tcp_accepted && !arranet_udp_accepted);
1134      assert(protocol == IPPROTO_TCP);
1135      arranet_raw_accepted = true;
1136      break;
1137    }
1138
1139    struct socket *sock = alloc_socket();
1140    assert(sock != NULL);
1141    sock->type = type;
1142    sock->protocol = protocol;
1143    /* printf("lwip_socket() = %d\n", sock->fd); */
1144    return sock->fd;
1145}
1146
1147int lwip_bind(int s, const struct sockaddr *name, socklen_t namelen)
1148{
1149    struct socket *sock = &sockets[s];
1150    assert(name->sa_family == AF_INET);
1151    assert(namelen >= sizeof(struct sockaddr_in));
1152    sock->bound_addr = *(struct sockaddr_in *)name;
1153    return 0;
1154}
1155
1156int lwip_recvfrom(int sockfd, void *buf, size_t len, int flags,
1157                  struct sockaddr *src_addr, socklen_t *addrlen)
1158{
1159    assert(arranet_udp_accepted || arranet_raw_accepted);
1160    struct socket *sock = &sockets[sockfd];
1161    struct waitset ws;
1162    waitset_init(&ws);
1163    int *recv_len;
1164    errval_t err;
1165    struct recv_udp_args udp_args;
1166    struct recv_raw_args raw_args;
1167
1168    switch(sock->type) {
1169    case SOCK_DGRAM:
1170      {
1171	  udp_args.buf = buf;
1172	  udp_args.len = len;
1173	  udp_args.src_addr = src_addr;
1174	  udp_args.addrlen = addrlen;
1175
1176	  err = waitset_chan_register_polled(&ws, &recv_chanstate,
1177					     MKCLOSURE(sock_recved_udp_packet, &udp_args));
1178	  assert(err_is_ok(err));
1179
1180	  recv_len = &udp_args.recv_len;
1181      }
1182      break;
1183
1184    case SOCK_RAW:
1185      {
1186	  raw_args.buf = buf;
1187	  raw_args.len = len;
1188	  raw_args.src_addr = src_addr;
1189	  raw_args.addrlen = addrlen;
1190
1191	  err = waitset_chan_register_polled(&ws, &recv_chanstate,
1192					     MKCLOSURE(sock_recved_raw_packet, &raw_args));
1193	  assert(err_is_ok(err));
1194
1195	  recv_len = &raw_args.recv_len;
1196      }
1197      break;
1198
1199    default:
1200        assert(!"NYI");
1201        break;
1202    }
1203
1204    assert(err_is_ok(err));
1205
1206    /* if socket is ready, trigger event right away */
1207    if (lwip_sock_ready_read(sockfd)) {
1208        err = waitset_chan_trigger(&recv_chanstate);
1209        assert(err_is_ok(err));
1210    }
1211
1212    if(sock->nonblocking) {
1213        err = event_dispatch_non_block(&ws);
1214        if(err_no(err) == LIB_ERR_NO_EVENT) {
1215            err = waitset_chan_deregister(&recv_chanstate);
1216            assert(err_is_ok(err));
1217            errno = EAGAIN;
1218            *recv_len = -1;
1219        } else {
1220            assert(err_is_ok(err));
1221        }
1222    } else {
1223        err = event_dispatch(&ws);
1224        assert(err_is_ok(err));
1225    }
1226
1227/* #ifdef DEBUG_LATENCIES */
1228/*     if(posix_recv_transactions < POSIX_TRANSA) { */
1229/*         protocol_binary_request_no_extras *mypayload = buf + UDP_HEADLEN; */
1230/*         posix_recv_time[posix_recv_transactions] = get_time() - mypayload->message.header.request.opaque; */
1231/*         posix_recv_transactions++; */
1232/*     } */
1233/* #endif */
1234
1235    // Packet is now in buffer
1236    return *recv_len;
1237}
1238
1239int recvfrom_arranet(int sockfd, void **buf, struct packet **p,
1240                     struct sockaddr *src_addr, socklen_t *addrlen)
1241{
1242    assert(arranet_udp_accepted);
1243    struct fdtab_entry *e = fdtab_get(sockfd);
1244    struct socket *sock = &sockets[e->fd];
1245    struct recv_udp_args args = {
1246        .buf = buf,
1247        .len = 0,
1248        .src_addr = src_addr,
1249        .addrlen = addrlen,
1250        .inpkt = p,
1251        .recv_len = 0,
1252    };
1253    struct waitset ws;
1254    waitset_init(&ws);
1255
1256    errval_t err = waitset_chan_register_polled(&ws, &recv_chanstate,
1257                                                MKCLOSURE(sock_recved_udp_packet, &args));
1258    assert(err_is_ok(err));
1259
1260    /* if socket is ready, trigger event right away */
1261    if (lwip_sock_ready_read(e->fd)) {
1262        err = waitset_chan_trigger(&recv_chanstate);
1263        assert(err_is_ok(err));
1264    }
1265
1266    if(sock->nonblocking) {
1267        err = event_dispatch_non_block(&ws);
1268        if(err_no(err) == LIB_ERR_NO_EVENT) {
1269            err = waitset_chan_deregister(&recv_chanstate);
1270            assert(err_is_ok(err));
1271            errno = EAGAIN;
1272            args.recv_len = -1;
1273        } else {
1274            assert(err_is_ok(err));
1275        }
1276    } else {
1277        err = event_dispatch(&ws);
1278        assert(err_is_ok(err));
1279    }
1280
1281/* #ifdef DEBUG_LATENCIES */
1282/*     if(posix_recv_transactions < POSIX_TRANSA) { */
1283/*         protocol_binary_request_no_extras *mypayload = (*buf) + UDP_HEADLEN; */
1284/*         posix_recv_time[posix_recv_transactions] = get_time() - mypayload->message.header.request.opaque; */
1285/*         posix_recv_transactions++; */
1286/*     } */
1287/* #endif */
1288
1289    // XXX: Assert dword alignment
1290    assert(((long)*buf) % 8 == 0);
1291
1292    // Packet is now in buffer
1293    return args.recv_len;
1294}
1295
1296static struct pkt_ip_headers packet_ip_header;
1297static struct pkt_udp_headers packet_udp_header;
1298static struct pkt_tcp_headers packet_tcp_header;
1299
1300static struct peer *peers_get_from_ip(uint32_t ip)
1301{
1302    for(int i = 0; i < MAX_PEERS; i++) {
1303        if(ip == peers[i].ip) {
1304            return &peers[i];
1305        }
1306    }
1307
1308    /* printf("NOT FOUND: %x\n", ip); */
1309
1310    return NULL;
1311}
1312
1313static struct peer *peers_get_next_free(void)
1314{
1315    if(peers_alloc < MAX_PEERS) {
1316        return &peers[peers_alloc++];
1317    } else {
1318        return NULL;
1319    }
1320}
1321
1322#define MAX_SENDMSG     16
1323
1324int sendmsg_arranet(int sockfd, const struct msghdr *msg)
1325{
1326    assert(arranet_udp_accepted);
1327    struct fdtab_entry *e = fdtab_get(sockfd);
1328    struct socket *sock = &sockets[e->fd];
1329    ssize_t short_size = 0;
1330    struct packet packets[MAX_SENDMSG];
1331    struct packet hdrpkt;
1332    struct packet *oldp = NULL;
1333
1334#ifdef DEBUG_LATENCIES
1335    if(posix_send_transactions < POSIX_TRANSA) {
1336        if(msg->msg_iovlen > 1 && msg->msg_iov[1].iov_len == sizeof(protocol_binary_response_no_extras)) {
1337            protocol_binary_response_no_extras *mypayload = msg->msg_iov[1].iov_base;
1338            posix_send_time[posix_send_transactions] = get_time() - mypayload->message.header.response.opaque;
1339            posix_send_transactions++;
1340        } else if(msg->msg_iov[0].iov_len >= sizeof(protocol_binary_request_no_extras)) {
1341            protocol_binary_request_no_extras *mypayload = msg->msg_iov[0].iov_base;
1342            posix_send_time[posix_send_transactions] = get_time() - mypayload->message.header.request.opaque;
1343            posix_send_transactions++;
1344        }
1345    }
1346#endif
1347
1348    assert(msg->msg_iovlen < MAX_SENDMSG);
1349
1350    for(int i = 0; i < msg->msg_iovlen; i++) {
1351        struct packet *newp = &packets[i];
1352
1353        newp->payload = (uint8_t *)msg->msg_iov[i].iov_base;
1354        newp->len = msg->msg_iov[i].iov_len;
1355        newp->next = NULL;
1356        newp->flags = 0;
1357        if(oldp != NULL) {
1358            oldp->next = newp;
1359        }
1360        short_size += msg->msg_iov[i].iov_len;
1361        oldp = newp;
1362    }
1363
1364    // Slap UDP/IP/Ethernet headers in front
1365    struct pkt_udp_headers myhdr = packet_udp_header;
1366    hdrpkt.payload = (uint8_t *)&myhdr;
1367    struct pkt_udp_headers *p = (struct pkt_udp_headers *)hdrpkt.payload;
1368    hdrpkt.len = sizeof(struct pkt_udp_headers);
1369    hdrpkt.next = packets;
1370
1371    // Fine-tune headers
1372    assert(msg->msg_name != NULL);
1373    struct sockaddr_in *saddr = msg->msg_name;
1374    assert(saddr->sin_family == AF_INET);
1375    p->ip.dest.addr = saddr->sin_addr.s_addr;
1376    p->udp.dest = saddr->sin_port;
1377    struct peer *peer = peers_get_from_ip(p->ip.dest.addr);
1378    p->eth.dest = peer->mac;
1379    assert(sock->bound_addr.sin_port != 0);
1380    p->udp.src = sock->bound_addr.sin_port;
1381    p->udp.len = htons(short_size + sizeof(struct udp_hdr));
1382    p->ip._len = htons(short_size + sizeof(struct udp_hdr) + IP_HLEN);
1383#ifdef CONFIG_QEMU_NETWORK
1384    p->ip._chksum = inet_chksum(&p->ip, IP_HLEN);
1385    hdrpkt.flags = 0;
1386#else
1387    // Hardware IP header checksumming on
1388    p->ip._chksum = 0;
1389    hdrpkt.flags = NETIF_TXFLAG_IPCHECKSUM;
1390#endif
1391
1392    packet_output(&hdrpkt);
1393
1394    return short_size;
1395}
1396
1397static struct packet *get_tx_packet(void)
1398{
1399    struct packet *p = &tx_packets[tx_idx];
1400
1401    // Busy-wait until packet not in flight
1402    while(p->len != 0) {
1403#ifdef DEBUG_LATENCIES
1404        output_pipeline_stalled++;
1405#endif
1406        /* printf("Pipeline stalled! tx_packets_available = %zd\n", tx_packets_available); */
1407        handle_free_tx_slot_fn_ptr();
1408        /* if(!handle_free_tx_slot_fn_ptr()) { */
1409        /*     printf("No packets could be freed!\n"); */
1410        /* } */
1411    }
1412
1413    /* tx_packets_available--; */
1414
1415    tx_idx = (tx_idx + 1) % MAX_PACKETS;
1416    return p;
1417}
1418
1419int lwip_shutdown(int s, int how)
1420{
1421    assert(arranet_tcp_accepted);
1422    struct socket *sock = &sockets[s];
1423    assert(sock->nonblocking);
1424
1425    /* printf("lwip_shutdown(%d)\n", s); */
1426
1427    if(how == SHUT_RD || sock->shutdown) {
1428        return 0;
1429    }
1430
1431    sock->shutdown = true;
1432
1433#ifdef SENDMSG_WITH_COPY
1434    // Get new TX packet and send FIN-ACK
1435    struct packet *newp = get_tx_packet();
1436    newp->len = sizeof(struct pkt_tcp_headers);
1437    newp->next = NULL;
1438
1439    // Slap TCP/IP/Ethernet headers in front
1440    memcpy(newp->payload, &packet_tcp_header, sizeof(struct pkt_tcp_headers));
1441
1442    // Fine-tune headers
1443    struct pkt_tcp_headers *p = (struct pkt_tcp_headers *)newp->payload;
1444    p->ip.dest.addr = sock->peer_addr.sin_addr.s_addr;
1445    p->tcp.dest = sock->peer_addr.sin_port;
1446    struct peer *peer = peers_get_from_ip(p->ip.dest.addr);
1447    p->eth.dest = peer->mac;
1448    assert(sock->bound_addr.sin_port != 0);
1449    p->tcp.src = sock->bound_addr.sin_port;
1450    p->ip._len = htons(sizeof(struct tcp_hdr) + IP_HLEN);
1451    p->tcp.seqno = htonl(sock->my_seq++);
1452    p->tcp.ackno = htonl(sock->next_ack);
1453    /* printf("lwip_shutdown: Sending %p, seq %x, ack %x\n", sock, sock->my_seq - 1, sock->next_ack); */
1454    TCPH_FLAGS_SET(&p->tcp, TCP_FIN | TCP_ACK); // Set FIN-ACK
1455    TCPH_HDRLEN_SET(&p->tcp, 5);   // 20 / 4
1456    p->tcp.wnd = htons(11680);
1457#ifdef CONFIG_QEMU_NETWORK
1458    p->ip._chksum = inet_chksum(&p->ip, IP_HLEN);
1459    p->tcp.chksum = 0;
1460    newp->payload = (uint8_t *)&p->tcp;
1461    newp->len -= (uint8_t *)&p->tcp - (uint8_t *)p;
1462    p->tcp.chksum = inet_chksum_pseudo(newp, (ip_addr_t *)&p->ip.src, (ip_addr_t *)&p->ip.dest,
1463                                       IP_PROTO_TCP, TCP_HLEN);
1464    newp->payload = (uint8_t *)p;
1465    newp->len = sizeof(struct pkt_tcp_headers);
1466    newp->flags = 0;
1467#else
1468    // Hardware IP/TCP header checksumming on
1469    p->ip._chksum = 0;
1470    p->tcp.chksum =
1471        (~inet_chksum_pseudo_partial(newp, (ip_addr_t *)&p->ip.src, (ip_addr_t *)&p->ip.dest,
1472                                     IP_PROTO_TCP, TCP_HLEN, 0)) & 0xffff;
1473    newp->flags = (NETIF_TXFLAG_IPCHECKSUM | NETIF_TXFLAG_TCPCHECKSUM) |
1474        (TCPH_HDRLEN(&p->tcp) << NETIF_TXFLAG_TCPHDRLEN_SHIFT);
1475#endif
1476
1477    packet_output(newp);
1478
1479    return 0;
1480#else
1481    assert(!"NYI");
1482#endif
1483}
1484
1485int lwip_close(int s)
1486{
1487  if(arranet_udp_accepted) {
1488    // XXX: Ignore for now
1489    return 0;
1490  }
1491
1492    assert(arranet_tcp_accepted);
1493    struct socket *sock = &sockets[s];
1494
1495    lwip_shutdown(s, SHUT_RDWR);
1496
1497    // Might need to return port if it was bound
1498    if(sock->bound_addr.sin_port != 0 && htons(sock->bound_addr.sin_port) != 8080) {
1499        tcp_free_port(sock->bound_addr.sin_port);
1500    }
1501
1502    // Remove from active connections
1503    if(sock->prev != NULL) {
1504        sock->prev->next = sock->next;
1505    }
1506    if(sock->next != NULL) {
1507        sock->next->prev = sock->prev;
1508    }
1509    if(connections == sock) {
1510        connections = sock->next;
1511    }
1512    sock->next = sock->prev = NULL;
1513
1514    free_socket(sock);
1515    return 0;
1516}
1517
1518int lwip_send(int s, const void *data, size_t size, int flags)
1519{
1520    assert(arranet_tcp_accepted);
1521    struct socket *sock = &sockets[s];
1522    assert(sock->nonblocking);
1523    assert(size + sizeof(struct pkt_tcp_headers) <= 1500);
1524
1525    /* printf("lwip_send(%d, , %zu)\n", s, size); */
1526
1527#ifdef SENDMSG_WITH_COPY
1528    // Get new TX packet and copy data into it
1529    struct packet *newp = get_tx_packet();
1530    newp->len = sizeof(struct pkt_tcp_headers) + size;
1531    newp->next = NULL;
1532    uint8_t *buf = newp->payload + sizeof(struct pkt_tcp_headers);
1533    memcpy(buf, data, size);
1534
1535    // Slap TCP/IP/Ethernet headers in front
1536    memcpy(newp->payload, &packet_tcp_header, sizeof(struct pkt_tcp_headers));
1537
1538    // Fine-tune headers
1539    struct pkt_tcp_headers *p = (struct pkt_tcp_headers *)newp->payload;
1540    p->ip.dest.addr = sock->peer_addr.sin_addr.s_addr;
1541    p->tcp.dest = sock->peer_addr.sin_port;
1542    struct peer *peer = peers_get_from_ip(p->ip.dest.addr);
1543    assert(peer != NULL);
1544    p->eth.dest = peer->mac;
1545    assert(sock->bound_addr.sin_port != 0);
1546    p->tcp.src = sock->bound_addr.sin_port;
1547    p->ip._len = htons(sizeof(struct tcp_hdr) + IP_HLEN + size);
1548    p->tcp.seqno = htonl(sock->my_seq);
1549    sock->my_seq += size;
1550    /* printf("lwip_send: Assigning %p, seq %x\n", sock, sock->my_seq); */
1551    p->tcp.ackno = htonl(sock->next_ack);
1552    /* printf("lwip_send: Sending %p, %x\n", sock, sock->next_ack); */
1553    TCPH_FLAGS_SET(&p->tcp, TCP_ACK | TCP_PSH);
1554    TCPH_HDRLEN_SET(&p->tcp, 5);   // 20 / 4
1555    p->tcp.wnd = htons(11680);
1556#ifdef CONFIG_QEMU_NETWORK
1557    p->ip._chksum = inet_chksum(&p->ip, IP_HLEN);
1558    p->tcp.chksum = 0;
1559    newp->payload = (uint8_t *)&p->tcp;
1560    newp->len -= (uint8_t *)&p->tcp - (uint8_t *)p;
1561    p->tcp.chksum = inet_chksum_pseudo(newp, (ip_addr_t *)&p->ip.src, (ip_addr_t *)&p->ip.dest,
1562                                       IP_PROTO_TCP, TCP_HLEN + size);
1563    newp->payload = (uint8_t *)p;
1564    newp->len = sizeof(struct pkt_tcp_headers) + size;
1565    newp->flags = 0;
1566#else
1567    // Hardware IP/TCP header checksumming on
1568    p->ip._chksum = 0;
1569    p->tcp.chksum =
1570        (~inet_chksum_pseudo_partial(newp, (ip_addr_t *)&p->ip.src, (ip_addr_t *)&p->ip.dest,
1571                                     IP_PROTO_TCP, TCP_HLEN + size, 0)) & 0xffff;
1572    newp->flags = (NETIF_TXFLAG_IPCHECKSUM | NETIF_TXFLAG_TCPCHECKSUM) |
1573        (TCPH_HDRLEN(&p->tcp) << NETIF_TXFLAG_TCPHDRLEN_SHIFT);
1574#endif
1575
1576    packet_output(newp);
1577
1578    return size;
1579#else
1580    assert(!"NYI");
1581#endif
1582}
1583
1584int lwip_connect(int s, const struct sockaddr *name, socklen_t namelen)
1585{
1586    /* printf("lwip_connect(%d)\n", s); */
1587    assert(arranet_tcp_accepted);
1588    struct socket *sock = &sockets[s];
1589    assert(sock->nonblocking);
1590    assert(namelen == sizeof(struct sockaddr_in));
1591    struct sockaddr_in *sa = (struct sockaddr_in *)name;
1592    assert(sa->sin_family == AF_INET);
1593
1594    // Store peer address on socket
1595    sock->peer_addr = *sa;
1596
1597#ifdef SENDMSG_WITH_COPY
1598    // Get new TX packet and send SYN
1599    struct packet *newp = get_tx_packet();
1600    newp->len = sizeof(struct pkt_tcp_headers) + 6;
1601    newp->next = NULL;
1602
1603    // Slap TCP/IP/Ethernet headers in front
1604    memcpy(newp->payload, &packet_tcp_header, sizeof(struct pkt_tcp_headers));
1605
1606    // Fine-tune headers
1607    struct pkt_tcp_headers *p = (struct pkt_tcp_headers *)newp->payload;
1608    uint32_t *payload = (void *)p + sizeof(struct pkt_tcp_headers);
1609    memset(payload, 0, 6);
1610    p->ip.dest.addr = sa->sin_addr.s_addr;
1611    p->tcp.dest = sa->sin_port;
1612    struct peer *peer = peers_get_from_ip(p->ip.dest.addr);
1613    assert(peer != NULL);
1614    p->eth.dest = peer->mac;
1615    assert(sock->bound_addr.sin_port == 0);
1616    sock->bound_addr.sin_port = tcp_new_port();
1617    p->tcp.src = sock->bound_addr.sin_port;
1618    p->ip._len = htons(sizeof(struct tcp_hdr) + IP_HLEN + 4);
1619    p->tcp.seqno = htonl(++sock->my_seq); sock->my_seq++;
1620    /* printf("lwip_connect: Assigning %p seq %x\n", sock, sock->my_seq); */
1621    p->tcp.ackno = 0;
1622    TCPH_FLAGS_SET(&p->tcp, TCP_SYN);
1623    TCPH_HDRLEN_SET(&p->tcp, 6);   // 24 / 4
1624    p->tcp.wnd = htons(11680);
1625    *payload = TCP_BUILD_MSS_OPTION(1460);
1626#ifdef CONFIG_QEMU_NETWORK
1627    p->ip._chksum = inet_chksum(&p->ip, IP_HLEN);
1628    p->tcp.chksum = 0;
1629    newp->payload = (uint8_t *)&p->tcp;
1630    newp->len -= (uint8_t *)&p->tcp - (uint8_t *)p;
1631    p->tcp.chksum = inet_chksum_pseudo(newp, (ip_addr_t *)&p->ip.src, (ip_addr_t *)&p->ip.dest,
1632                                       IP_PROTO_TCP, TCP_HLEN + 4);
1633    newp->payload = (uint8_t *)p;
1634    newp->len = sizeof(struct pkt_tcp_headers) + 6;
1635    newp->flags = 0;
1636#else
1637    // Hardware IP/TCP header checksumming on
1638    p->ip._chksum = 0;
1639    p->tcp.chksum = 0;
1640    p->tcp.chksum =
1641        (~inet_chksum_pseudo_partial(newp, (ip_addr_t *)&p->ip.src, (ip_addr_t *)&p->ip.dest,
1642                                     IP_PROTO_TCP, TCP_HLEN + 4, 0)) & 0xffff;
1643    newp->flags = (NETIF_TXFLAG_IPCHECKSUM | NETIF_TXFLAG_TCPCHECKSUM) |
1644        (TCPH_HDRLEN(&p->tcp) << NETIF_TXFLAG_TCPHDRLEN_SHIFT);
1645#endif
1646
1647    packet_output(newp);
1648
1649    assert(sock->prev == NULL && sock->next == NULL);
1650    sock->next = connections;
1651    if(connections != NULL) {
1652        assert(connections->prev == NULL);
1653        connections->prev = sock;
1654    }
1655    sock->prev = NULL;
1656    connections = sock;
1657
1658    errno = EINPROGRESS;
1659    return -1;
1660#else
1661    assert(!"NYI");
1662#endif
1663}
1664
1665#ifdef SENDMSG_WITH_COPY
1666
1667int lwip_sendmsg(int sockfd, const struct msghdr *msg, int flags)
1668{
1669    assert(arranet_udp_accepted || arranet_raw_accepted);
1670    struct socket *sock = &sockets[sockfd];
1671
1672#ifdef DEBUG_LATENCIES
1673    if(posix_send_transactions < POSIX_TRANSA) {
1674        if(msg->msg_iovlen > 1 && msg->msg_iov[1].iov_len == sizeof(protocol_binary_response_no_extras)) {
1675            protocol_binary_response_no_extras *mypayload = msg->msg_iov[1].iov_base;
1676            posix_send_time[posix_send_transactions] = get_time() - mypayload->message.header.response.opaque;
1677            posix_send_transactions++;
1678        } else if(msg->msg_iov[0].iov_len >= sizeof(protocol_binary_request_no_extras)) {
1679            protocol_binary_request_no_extras *mypayload = msg->msg_iov[0].iov_base + UDP_HEADLEN;
1680            posix_send_time[posix_send_transactions] = get_time() - mypayload->message.header.request.opaque;
1681            posix_send_transactions++;
1682        }
1683    }
1684#endif
1685
1686    assert(msg->msg_iovlen < MAX_SENDMSG);
1687
1688    // Determine length of sendmsg vector
1689    ssize_t short_size = 0;
1690    for(int i = 0; i < msg->msg_iovlen; i++) {
1691        short_size += msg->msg_iov[i].iov_len;
1692    }
1693    assert(short_size <= PACKET_SIZE);
1694
1695/* #ifdef DEBUG_LATENCIES */
1696/*     if(memcache_transactions[0] < POSIX_TRANSA) { */
1697/*         if(msg->msg_iovlen > 1 && msg->msg_iov[1].iov_len == sizeof(protocol_binary_response_no_extras)) { */
1698/*             protocol_binary_response_no_extras *mypayload = msg->msg_iov[1].iov_base; */
1699/*             memcache_times[0][memcache_transactions[0]] = get_time() - mypayload->message.header.response.opaque; */
1700/*             memcache_transactions[0]++; */
1701/*         } else if(msg->msg_iov[0].iov_len >= sizeof(protocol_binary_request_no_extras)) { */
1702/*             protocol_binary_request_no_extras *mypayload = msg->msg_iov[0].iov_base + UDP_HEADLEN; */
1703/*             memcache_times[0][memcache_transactions[0]] = get_time() - mypayload->message.header.request.opaque; */
1704/*             memcache_transactions[0]++; */
1705/*         } */
1706/*     } */
1707/* #endif */
1708
1709    // Get new TX packet and copy data into it
1710    struct packet *newp = get_tx_packet();
1711    uint8_t *buf = newp->payload;
1712    size_t pos;
1713    if(sock->type == SOCK_DGRAM) {
1714      pos = sizeof(struct pkt_udp_headers);
1715    } else {
1716      assert(sock->type == SOCK_RAW);
1717      pos = sizeof(struct pkt_ip_headers);
1718    }
1719
1720/* #ifdef DEBUG_LATENCIES */
1721/*     if(memcache_transactions[1] < POSIX_TRANSA) { */
1722/*         if(msg->msg_iovlen > 1 && msg->msg_iov[1].iov_len == sizeof(protocol_binary_response_no_extras)) { */
1723/*             protocol_binary_response_no_extras *mypayload = msg->msg_iov[1].iov_base; */
1724/*             memcache_times[1][memcache_transactions[1]] = get_time() - mypayload->message.header.response.opaque; */
1725/*             memcache_transactions[1]++; */
1726/*         } else if(msg->msg_iov[0].iov_len >= sizeof(protocol_binary_request_no_extras)) { */
1727/*             protocol_binary_request_no_extras *mypayload = msg->msg_iov[0].iov_base + UDP_HEADLEN; */
1728/*             memcache_times[1][memcache_transactions[1]] = get_time() - mypayload->message.header.request.opaque; */
1729/*             memcache_transactions[1]++; */
1730/*         } */
1731/*     } */
1732
1733/*     uint64_t last = rdpmc(0); */
1734/* #endif */
1735
1736    //    assert(msg->msg_iovlen == 1);
1737    for(int i = 0; i < msg->msg_iovlen; i++) {
1738        /* assert((uintptr_t)(&buf[pos]) % 8 == 0); */
1739        //        assert((uintptr_t)msg->msg_iov[i].iov_base % 8 == 0);
1740        memcpy(&buf[pos], msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len);
1741        pos += msg->msg_iov[i].iov_len;
1742    }
1743
1744#ifdef DEBUG_LATENCIES
1745    /* uint64_t now = rdpmc(0); */
1746
1747    /* if(memcache_transactions[19] < POSIX_TRANSA) {   // ZZZ 19 */
1748    /*     memcache_times[19][memcache_transactions[19]] = now - last; */
1749    /*     memcache_transactions[19]++; */
1750    /* } */
1751
1752    if(memcache_transactions[2] < POSIX_TRANSA) {
1753        if(msg->msg_iovlen > 1 && msg->msg_iov[1].iov_len == sizeof(protocol_binary_response_no_extras)) {
1754            protocol_binary_response_no_extras *mypayload = msg->msg_iov[1].iov_base;
1755            memcache_times[2][memcache_transactions[2]] = get_time() - mypayload->message.header.response.opaque;
1756            memcache_transactions[2]++;
1757        } else if(msg->msg_iov[0].iov_len >= sizeof(protocol_binary_request_no_extras)) {
1758            protocol_binary_request_no_extras *mypayload = msg->msg_iov[0].iov_base + UDP_HEADLEN;
1759            memcache_times[2][memcache_transactions[2]] = get_time() - mypayload->message.header.request.opaque;
1760            memcache_transactions[2]++;
1761        }
1762    }
1763#endif
1764
1765    if(sock->type == SOCK_DGRAM) {
1766        newp->len = short_size + sizeof(struct pkt_udp_headers);
1767
1768        // Slap UDP/IP/Ethernet headers in front
1769        memcpy(buf, &packet_udp_header, sizeof(struct pkt_udp_headers));
1770    } else {
1771        assert(sock->type == SOCK_RAW);
1772        newp->len = short_size + sizeof(struct pkt_ip_headers);
1773        // Slap IP/Ethernet headers in front
1774        memcpy(buf, &packet_ip_header, sizeof(struct pkt_ip_headers));
1775    }
1776    newp->next = NULL;
1777
1778/* #ifdef DEBUG_LATENCIES */
1779/*     if(memcache_transactions[3] < POSIX_TRANSA) { */
1780/*         if(msg->msg_iovlen > 1 && msg->msg_iov[1].iov_len == sizeof(protocol_binary_response_no_extras)) { */
1781/*             protocol_binary_response_no_extras *mypayload = msg->msg_iov[1].iov_base; */
1782/*             memcache_times[3][memcache_transactions[3]] = get_time() - mypayload->message.header.response.opaque; */
1783/*             memcache_transactions[3]++; */
1784/*         } else if(msg->msg_iov[0].iov_len >= sizeof(protocol_binary_request_no_extras)) { */
1785/*             protocol_binary_request_no_extras *mypayload = msg->msg_iov[0].iov_base + UDP_HEADLEN; */
1786/*             memcache_times[3][memcache_transactions[3]] = get_time() - mypayload->message.header.request.opaque; */
1787/*             memcache_transactions[3]++; */
1788/*         } */
1789/*     } */
1790/* #endif */
1791
1792    if (sock->type = SOCK_DGRAM) {
1793        // Fine-tune headers
1794        struct pkt_udp_headers *p = (struct pkt_udp_headers *)buf;
1795        assert(msg->msg_name != NULL);
1796        struct sockaddr_in *saddr = msg->msg_name;
1797        assert(saddr->sin_family == AF_INET);
1798        p->ip.dest.addr = saddr->sin_addr.s_addr;
1799        p->udp.dest = saddr->sin_port;
1800        struct peer *peer = peers_get_from_ip(p->ip.dest.addr);
1801        p->eth.dest = peer->mac;
1802        assert(sock->bound_addr.sin_port != 0);
1803        p->udp.src = sock->bound_addr.sin_port;
1804        p->udp.len = htons(short_size + sizeof(struct udp_hdr));
1805        p->ip._len = htons(short_size + sizeof(struct udp_hdr) + IP_HLEN);
1806#ifdef CONFIG_QEMU_NETWORK
1807        p->ip._chksum = inet_chksum(&p->ip, IP_HLEN);
1808        newp->flags = 0;
1809#else
1810        // Hardware IP header checksumming on
1811        p->ip._chksum = 0;
1812        newp->flags = NETIF_TXFLAG_IPCHECKSUM;
1813#endif
1814    } else {
1815      assert(sock->type == SOCK_RAW);
1816      // Fine-tune headers
1817      struct pkt_ip_headers *p = (struct pkt_ip_headers *)buf;
1818      assert(msg->msg_name != NULL);
1819      struct sockaddr_in *saddr = msg->msg_name;
1820      assert(saddr->sin_family == AF_INET);
1821      p->ip.dest.addr = saddr->sin_addr.s_addr;
1822      struct peer *peer = peers_get_from_ip(p->ip.dest.addr);
1823      assert(peer != NULL);
1824      p->eth.dest = peer->mac;
1825      p->ip._len = htons(short_size + IP_HLEN);
1826      IPH_PROTO_SET(&p->ip, sock->protocol);
1827#ifdef CONFIG_QEMU_NETWORK
1828      p->ip._chksum = inet_chksum(&p->ip, IP_HLEN);
1829      newp->flags = 0;
1830#else
1831      // Hardware IP header checksumming on
1832      p->ip._chksum = 0;
1833      newp->flags = NETIF_TXFLAG_IPCHECKSUM;
1834#endif
1835    }
1836
1837/* #ifdef DEBUG_LATENCIES */
1838/*     if(memcache_transactions[4] < POSIX_TRANSA) { */
1839/*         if(msg->msg_iovlen > 1 && msg->msg_iov[1].iov_len == sizeof(protocol_binary_response_no_extras)) { */
1840/*             protocol_binary_response_no_extras *mypayload = msg->msg_iov[1].iov_base; */
1841/*             memcache_times[4][memcache_transactions[4]] = get_time() - mypayload->message.header.response.opaque; */
1842/*             memcache_transactions[4]++; */
1843/*         } else if(msg->msg_iov[0].iov_len >= sizeof(protocol_binary_request_no_extras)) { */
1844/*             protocol_binary_request_no_extras *mypayload = msg->msg_iov[0].iov_base + UDP_HEADLEN; */
1845/*             memcache_times[4][memcache_transactions[4]] = get_time() - mypayload->message.header.request.opaque; */
1846/*             memcache_transactions[4]++; */
1847/*         } */
1848/*     } */
1849/* #endif */
1850
1851    packet_output(newp);
1852
1853/* #ifdef DEBUG_LATENCIES */
1854/*     if(memcache_transactions[5] < POSIX_TRANSA) { */
1855/*         if(msg->msg_iovlen > 1 && msg->msg_iov[1].iov_len == sizeof(protocol_binary_response_no_extras)) { */
1856/*             protocol_binary_response_no_extras *mypayload = msg->msg_iov[1].iov_base; */
1857/*             memcache_times[5][memcache_transactions[5]] = get_time() - mypayload->message.header.response.opaque; */
1858/*             memcache_transactions[5]++; */
1859/*         } else if(msg->msg_iov[0].iov_len >= sizeof(protocol_binary_request_no_extras)) { */
1860/*             protocol_binary_request_no_extras *mypayload = msg->msg_iov[0].iov_base + UDP_HEADLEN; */
1861/*             memcache_times[5][memcache_transactions[5]] = get_time() - mypayload->message.header.request.opaque; */
1862/*             memcache_transactions[5]++; */
1863/*         } */
1864/*     } */
1865/* #endif */
1866
1867    return short_size;
1868}
1869
1870#else
1871
1872int lwip_sendmsg(int sockfd, const struct msghdr *msg, int flags)
1873{
1874    struct socket *sock = &sockets[sockfd];
1875    ssize_t short_size = 0;
1876    struct packet packets[MAX_SENDMSG];
1877    struct packet *oldp = NULL;
1878
1879    assert(msg->msg_iovlen < MAX_SENDMSG);
1880
1881    for(int i = 0; i < msg->msg_iovlen; i++) {
1882        struct packet *newp = &packets[i];
1883
1884        newp->payload = (uint8_t *)msg->msg_iov[i].iov_base;
1885        newp->len = msg->msg_iov[i].iov_len;
1886        newp->next = NULL;
1887        newp->flags = 0;
1888        newp->opaque = msg->msg_iov[i].iov_opaque;
1889        if(oldp != NULL) {
1890            oldp->next = newp;
1891        }
1892        short_size += msg->msg_iov[i].iov_len;
1893        oldp = newp;
1894    }
1895
1896    // Slap UDP/IP/Ethernet headers in front
1897    struct packet *hdrpkt = get_tx_packet();
1898    memcpy(hdrpkt->payload, &packet_udp_header, sizeof(struct pkt_udp_headers));
1899    struct pkt_udp_headers *p = (struct pkt_udp_headers *)hdrpkt->payload;
1900    hdrpkt->len = sizeof(struct pkt_udp_headers);
1901    hdrpkt->next = packets;
1902    hdrpkt->opaque = hdrpkt;
1903
1904    // Fine-tune headers
1905    assert(msg->msg_name != NULL);
1906    struct sockaddr_in *saddr = msg->msg_name;
1907    assert(saddr->sin_family == AF_INET);
1908    p->ip.dest.addr = saddr->sin_addr.s_addr;
1909    p->udp.dest = saddr->sin_port;
1910    struct peer *peer = peers_get_from_ip(p->ip.dest.addr);
1911    p->eth.dest = peer->mac;
1912    assert(sock->bound_addr.sin_port != 0);
1913    p->udp.src = sock->bound_addr.sin_port;
1914    p->udp.len = htons(short_size + sizeof(struct udp_hdr));
1915    p->ip._len = htons(short_size + sizeof(struct udp_hdr) + IP_HLEN);
1916#ifdef CONFIG_QEMU_NETWORK
1917    p->ip._chksum = inet_chksum(&p->ip, IP_HLEN);
1918    hdrpkt->flags = 0;
1919#else
1920    // Hardware IP header checksumming on
1921    p->ip._chksum = 0;
1922    hdrpkt->flags = NETIF_TXFLAG_IPCHECKSUM;
1923#endif
1924
1925    packet_output(hdrpkt);
1926
1927    // If we sent the data directly, we need to wait here until everything is out.
1928    // Else, data might be overwritten by application before card can send it.
1929    /* while(!e1000n_queue_empty()) thread_yield(); */
1930
1931    return short_size;
1932}
1933
1934#endif
1935
1936int lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
1937{
1938    assert(arranet_tcp_accepted);
1939    struct socket *sock = &sockets[s];
1940    assert(sock->passive);
1941    struct socket *newsock = alloc_socket();
1942    newsock->nonblocking = sock->nonblocking;
1943    newsock->bound_addr = sock->bound_addr;
1944    newsock->type = sock->type;
1945    socklen_t adlen = sizeof(struct sockaddr_in);
1946    struct recv_tcp_args args = {
1947        .buf = NULL,
1948        .len = 0,
1949        .src_addr = (struct sockaddr *)&newsock->peer_addr,
1950        .addrlen = &adlen,
1951        .syn = false,
1952        .sock = newsock,
1953    };
1954    struct waitset ws;
1955    waitset_init(&ws);
1956
1957    errval_t err = waitset_chan_register_polled(&ws, &recv_chanstate,
1958                                                MKCLOSURE(sock_recved_tcp_packet, &args));
1959    assert(err_is_ok(err));
1960
1961    /* if socket is ready, trigger event right away */
1962    if (lwip_sock_ready_read(s)) {
1963        err = waitset_chan_trigger(&recv_chanstate);
1964        assert(err_is_ok(err));
1965    }
1966
1967    if(sock->nonblocking) {
1968        err = event_dispatch_non_block(&ws);
1969        if(err_no(err) == LIB_ERR_NO_EVENT) {   // Deregister if it didn't fire
1970            err = waitset_chan_deregister(&recv_chanstate);
1971            assert(err_is_ok(err));
1972        }
1973
1974        if(err_no(err) == LIB_ERR_NO_EVENT || !args.syn) {
1975            free_socket(newsock);
1976            errno = EAGAIN;
1977            return -1;
1978        } else {
1979            assert(err_is_ok(err));
1980        }
1981
1982        if(!args.syn) {
1983            free_socket(newsock);
1984            errno = EAGAIN;
1985            return -1;
1986        }
1987    } else {
1988        err = event_dispatch(&ws);
1989        assert(err_is_ok(err));
1990
1991        if(!args.syn) {
1992            assert(!"Will block forever");
1993        }
1994    }
1995
1996    assert(adlen == sizeof(struct sockaddr_in));
1997    assert(*addrlen >= sizeof(struct sockaddr_in));
1998    // Set caller's addr buffers
1999    if(addr != NULL) {
2000        memcpy(addr, &newsock->peer_addr, sizeof(struct sockaddr_in));
2001        *addrlen = adlen;
2002    }
2003
2004    /* newsock->my_seq = 0; */
2005    newsock->peer_seq = htonl(args.in_seqno);
2006    /* printf("lwip_accept: Assigning %p seq %x\n", newsock, newsock->my_seq); */
2007
2008#ifdef SENDMSG_WITH_COPY
2009    // Get new TX packet and send SYN-ACK
2010    struct packet *newp = get_tx_packet();
2011    newp->len = sizeof(struct pkt_tcp_headers) + 4;
2012    newp->next = NULL;
2013
2014    // Slap TCP/IP/Ethernet headers in front
2015    memcpy(newp->payload, &packet_tcp_header, sizeof(struct pkt_tcp_headers));
2016
2017    // Fine-tune headers
2018    struct pkt_tcp_headers *p = (struct pkt_tcp_headers *)newp->payload;
2019    uint32_t *payload = (void *)p + sizeof(struct pkt_tcp_headers);
2020    memset(payload, 0, 4);
2021    p->ip.dest.addr = newsock->peer_addr.sin_addr.s_addr;
2022    p->tcp.dest = newsock->peer_addr.sin_port;
2023    struct peer *peer = peers_get_from_ip(p->ip.dest.addr);
2024    p->eth.dest = peer->mac;
2025    assert(sock->bound_addr.sin_port != 0);
2026    p->tcp.src = sock->bound_addr.sin_port;
2027    p->ip._len = htons(sizeof(struct tcp_hdr) + IP_HLEN + 4);
2028    p->tcp.seqno = htonl(++newsock->my_seq); newsock->my_seq++;
2029    /* printf("lwip_accept: Assigning %p seq %x\n", newsock, newsock->my_seq); */
2030    newsock->next_ack = newsock->peer_seq + 1;
2031    /* printf("lwip_accept: Assigning %p, %x\n", newsock, newsock->next_ack); */
2032    p->tcp.ackno = htonl(newsock->next_ack);
2033    /* printf("lwip_accept: Sending %p, %x\n", newsock, newsock->next_ack); */
2034    TCPH_FLAGS_SET(&p->tcp, TCP_SYN | TCP_ACK); // Set SYN-ACK
2035    TCPH_HDRLEN_SET(&p->tcp, 6);   // 24 / 4
2036    p->tcp.wnd = htons(11680);
2037    *payload = TCP_BUILD_MSS_OPTION(1460);
2038#ifdef CONFIG_QEMU_NETWORK
2039    p->ip._chksum = inet_chksum(&p->ip, IP_HLEN);
2040    p->tcp.chksum = 0;
2041    newp->payload = (uint8_t *)&p->tcp;
2042    newp->len -= (uint8_t *)&p->tcp - (uint8_t *)p;
2043    p->tcp.chksum = inet_chksum_pseudo(newp, (ip_addr_t *)&p->ip.src, (ip_addr_t *)&p->ip.dest,
2044                                       IP_PROTO_TCP, TCP_HLEN + 4);
2045    newp->payload = (uint8_t *)p;
2046    newp->len = sizeof(struct pkt_tcp_headers) + 4;
2047    newp->flags = 0;
2048#else
2049    // Hardware IP/TCP header checksumming on
2050    p->ip._chksum = 0;
2051    p->tcp.chksum =
2052        (~inet_chksum_pseudo_partial(newp, (ip_addr_t *)&p->ip.src, (ip_addr_t *)&p->ip.dest,
2053                                     IP_PROTO_TCP, TCP_HLEN + 4, 0)) & 0xffff;
2054    newp->flags = (NETIF_TXFLAG_IPCHECKSUM | NETIF_TXFLAG_TCPCHECKSUM) |
2055        (6 << NETIF_TXFLAG_TCPHDRLEN_SHIFT);
2056#endif
2057
2058    packet_output(newp);
2059#else
2060    assert(!"NYI");
2061#endif
2062
2063    /* printf("Returned %d\n", newsock->fd); */
2064    newsock->connected = true;
2065    assert(newsock->prev == NULL && newsock->next == NULL);
2066    newsock->next = connections;
2067    if(connections != NULL) {
2068        assert(connections->prev == NULL);
2069        connections->prev = newsock;
2070    }
2071    newsock->prev = NULL;
2072    connections = newsock;
2073
2074    /* printf("lwip_accept(%d) = %d\n", s, newsock->fd); */
2075    return newsock->fd;
2076}
2077
2078void process_received_packet(struct driver_rx_buffer *buffer, size_t count,
2079                             uint64_t flags)
2080{
2081    struct packet *p = buffer->opaque;
2082    assert(p != NULL);
2083    assert(count == 1);
2084    p->len = buffer->len;
2085
2086    /* printf("Got %p from driver\n", p); */
2087
2088    assert(p >= rx_packets && p < &rx_packets[MAX_PACKETS]);
2089
2090#ifdef DEBUG_LATENCIES
2091    rx_packets_available--;
2092    if(rx_packets_available < 10) {
2093        printf("Too many RX packets in flight!\n");
2094    }
2095#endif
2096
2097    // Drop packets with invalid checksums
2098    if(flags & NETIF_RXFLAG_IPCHECKSUM) {
2099        if(!(flags & NETIF_RXFLAG_IPCHECKSUM_GOOD)) {
2100            goto out;
2101        }
2102    }
2103
2104    if(flags & NETIF_RXFLAG_L4CHECKSUM) {
2105        if(!(flags & NETIF_RXFLAG_L4CHECKSUM_GOOD)) {
2106            goto out;
2107        }
2108    }
2109
2110    struct eth_hdr *ethhdr = (struct eth_hdr *)p->payload;
2111    switch (htons(ethhdr->type)) {
2112    case ETHTYPE_ARP:
2113        {
2114            struct etharp_hdr *arphdr = (struct etharp_hdr *)(p->payload + SIZEOF_ETH_HDR);
2115            uint32_t dipaddr = (arphdr->dipaddr.addrw[1] << 16) | arphdr->dipaddr.addrw[0];
2116
2117            /* printf("%d: ARP request, dip = %x\n", disp_get_core_id(), dipaddr); */
2118
2119            if(htons(arphdr->opcode) == ARP_REQUEST &&
2120               dipaddr == arranet_myip) {
2121                // Send reply
2122                struct packet outp;
2123		// XXX: Static payload! Need to lock if multithreaded!
2124                static uint8_t payload[PACKET_SIZE];
2125                struct eth_hdr *myeth = (struct eth_hdr *)payload;
2126                struct etharp_hdr *myarp = (struct etharp_hdr *)(payload + SIZEOF_ETH_HDR);
2127
2128                /* printf("%d: ARP request for us!\n", disp_get_core_id()); */
2129
2130                // ETH header
2131                memcpy(&myeth->dest, &arphdr->shwaddr, ETHARP_HWADDR_LEN);
2132                memcpy(&myeth->src, arranet_mymac, ETHARP_HWADDR_LEN);
2133                myeth->type = htons(ETHTYPE_ARP);
2134
2135                // ARP header
2136                myarp->hwtype = htons(1);
2137                myarp->proto = htons(ETHTYPE_IP);
2138                myarp->hwlen = 6;
2139                myarp->protolen = 4;
2140                myarp->opcode = htons(ARP_REPLY);
2141                memcpy(&myarp->shwaddr, arranet_mymac, ETHARP_HWADDR_LEN);
2142                memcpy(&myarp->sipaddr, &arphdr->dipaddr, sizeof(myarp->sipaddr));
2143                memcpy(&myarp->dhwaddr, &arphdr->shwaddr, ETHARP_HWADDR_LEN);
2144                memcpy(&myarp->dipaddr, &arphdr->sipaddr, sizeof(myarp->dipaddr));
2145
2146                outp.payload = payload;
2147                outp.len = SIZEOF_ETHARP_PACKET;
2148                /* outp.len = p->len; */
2149                outp.next = NULL;
2150                outp.flags = 0;
2151                outp.opaque = NULL;
2152
2153                packet_output(&outp);
2154		static int arp_count = 0;
2155		arp_count++;
2156		if(arp_count > 100) {
2157		  printf("High ARP count!\n");
2158		}
2159                while(!e1000n_queue_empty()) thread_yield();
2160            }
2161        }
2162        break;
2163
2164    case ETHTYPE_IP:
2165        {
2166            struct ip_hdr *iphdr = (struct ip_hdr *)(p->payload + SIZEOF_ETH_HDR);
2167
2168            /* printf("%d: Is an IP packet, type %x\n", disp_get_core_id(), IPH_PROTO(iphdr)); */
2169
2170#ifdef DEBUG_LATENCIES
2171            if(IPH_PROTO(iphdr) == IP_PROTO_ICMP) {
2172                static uint64_t cache_misses = 0;
2173                uint64_t new_cache_misses = rdpmc(0);
2174                printf("Cache misses = %" PRIu64 "\n", new_cache_misses - cache_misses);
2175                cache_misses = new_cache_misses;
2176
2177                printf("hash_option1 = %zd, hash_option2 = %zd, hash_option3 = %zd\n",
2178                       hash_option1, hash_option2, hash_option3);
2179                printf("hash_calls = %zd, hash_length = %zd\n",
2180                       hash_calls, hash_length);
2181
2182                printf("output pipeline stalled = %zd\n", output_pipeline_stalled);
2183                output_pipeline_stalled = 0;
2184
2185                printf("memcache packets received = %zd\n", memcache_packets_received);
2186                memcache_packets_received = 0;
2187                for(int i = 0; i < 65536; i++) {
2188                    if(port_cnt[i] != 0) {
2189                        printf("port %d = %zu\n", i, port_cnt[i]);
2190                        port_cnt[i] = 0;
2191                    }
2192                }
2193
2194                printf("recv_transa = %zu, send_transa = %zu\n",
2195                       posix_recv_transactions, posix_send_transactions);
2196                printf("posix_recv_transactions:\n");
2197                for(int i = 0; i < posix_recv_transactions; i++) {
2198                    printf("%u us\n", posix_recv_time[i]);
2199                }
2200                printf("posix_send_transactions:\n");
2201                for(int i = 0; i < posix_send_transactions; i++) {
2202                    printf("%u us\n", posix_send_time[i]);
2203                }
2204                posix_recv_transactions = posix_send_transactions = 0;
2205
2206                printf("lwip_send_transa = %zu\n", lwip_send_transactions);
2207                printf("lwip_send_transactions:\n");
2208                for(int i = 0; i < lwip_send_transactions; i++) {
2209                    printf("%u us\n", lwip_send_time[i]);
2210                }
2211                lwip_send_transactions = 0;
2212
2213                for(int j = 0; j < 20; j++) {
2214                    printf("memcache_transa[%d] = %zu:\n", j, memcache_transactions[j]);
2215                    for(int i = 0; i < memcache_transactions[j]; i++) {
2216                        printf("%u us\n", memcache_times[j][i]);
2217                    }
2218                    memcache_transactions[j] = 0;
2219                }
2220            }
2221#endif
2222
2223            // Has to be UDP or TCP
2224            if(IPH_PROTO(iphdr) != IP_PROTO_UDP && IPH_PROTO(iphdr) != IP_PROTO_TCP) {
2225                goto out;
2226            }
2227
2228            // XXX: Filter for our IP
2229            if(iphdr->dest.addr != arranet_myip) {
2230                goto out;
2231            }
2232
2233	    // Take raw IP packets if that's accepted and ignore the rest
2234	    if(arranet_raw_accepted) {
2235                // XXX: Accept only TCP for now
2236                if(IPH_PROTO(iphdr) == IP_PROTO_TCP) {
2237                    goto accept;
2238                } else {
2239                    goto out;
2240                }
2241	    }
2242
2243            if(IPH_PROTO(iphdr) == IP_PROTO_UDP) {
2244                struct udp_hdr *udphdr = (struct udp_hdr *)(p->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4));
2245                /* uint8_t *payload = p->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4) + sizeof(struct udp_hdr); */
2246
2247                // Are we accepting UDP packets?
2248                if(!arranet_udp_accepted) {
2249                    goto out;
2250                }
2251
2252                /* printf("Got UDP packet, dest IP %x, dest port %u\n", */
2253                /*        htonl(iphdr->dest.addr), htons(udphdr->dest)); */
2254
2255                // XXX: Filter for UDP ports 1234, 11211, 11212
2256                // TODO: Done in hardware soon
2257                if(htons(udphdr->dest) != 1234 &&
2258                   htons(udphdr->dest) != 11211 &&
2259                   htons(udphdr->dest) != 11212) {
2260                    goto out;
2261                }
2262
2263#ifdef DEBUG_LATENCIES
2264                {
2265                    memcache_packets_received++;
2266                    port_cnt[htons(udphdr->src)]++;
2267                    protocol_binary_request_no_extras *mypayload = (void *)p->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4) + sizeof(struct udp_hdr) + UDP_HEADLEN;
2268                    mypayload->message.header.request.opaque = get_time();
2269                }
2270#endif
2271            }
2272
2273            if(IPH_PROTO(iphdr) == IP_PROTO_TCP) {
2274                struct tcp_hdr *tcphdr = (struct tcp_hdr *)(p->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4));
2275
2276                // Are we accepting TCP packets?
2277                if(!arranet_tcp_accepted) {
2278                    goto out;
2279                }
2280
2281                /* printf("Got TCP packet, dest IP %x, src IP %x, dest port %u, src %u\n", */
2282                /*        htonl(iphdr->dest.addr), htonl(iphdr->src.addr), */
2283                /*        htons(tcphdr->dest), htons(tcphdr->src)); */
2284
2285                // XXX: Filter for TCP port 8080 and everything that we know
2286                // TODO: Done in hardware soon
2287                struct socket *sock = NULL;
2288                for(sock = connections; sock != NULL; sock = sock->next) {
2289                    if(sock->bound_addr.sin_port == tcphdr->dest &&
2290                       sock->peer_addr.sin_port == tcphdr->src &&
2291                       sock->peer_addr.sin_addr.s_addr == iphdr->src.addr) {
2292                        break;
2293                    }
2294                }
2295
2296                p->sock = sock;
2297
2298                // Handle SYN-ACKs for connections we created and FIN-ACKs
2299                // Also ACK any data packet that came in
2300                uint16_t pkt_len = htons(IPH_LEN(iphdr)) - (TCPH_HDRLEN(tcphdr) * 4) - (IPH_HL(iphdr) * 4);
2301                if((TCPH_FLAGS(tcphdr) & TCP_ACK) &&
2302                   ((TCPH_FLAGS(tcphdr) & TCP_SYN) || (TCPH_FLAGS(tcphdr) & TCP_FIN) || pkt_len > 0)) {
2303                    bool is_retransmit = false;
2304
2305                    if(TCPH_FLAGS(tcphdr) & TCP_SYN) {
2306                        assert(sock != NULL);
2307                        sock->connected = true;
2308                    }
2309                    if((TCPH_FLAGS(tcphdr) & TCP_FIN) && sock != NULL) {
2310                        // It said FIN, so we're not expecting any more from that side
2311                        sock->connected = false;
2312                        sock->hangup = true;
2313                        // Signal application
2314                        if (waitset_chan_is_registered(&recv_chanstate)) {
2315                            errval_t err = waitset_chan_trigger(&recv_chanstate);
2316                            assert(err_is_ok(err));
2317                        }
2318
2319                        if(sock->prev != NULL) {
2320                            sock->prev->next = sock->next;
2321                        }
2322                        if(sock->next != NULL) {
2323                            sock->next->prev = sock->prev;
2324                        }
2325                        if(connections == sock) {
2326                            connections = sock->next;
2327                        }
2328                        sock->next = sock->prev = NULL;
2329                    }
2330
2331                    if(sock != NULL) {
2332                        uint32_t new_peer_seq = htonl(tcphdr->seqno);
2333                        if(new_peer_seq == sock->peer_seq &&
2334                           new_peer_seq + pkt_len == sock->next_ack) {
2335                            is_retransmit = true;
2336                            /* printf("Is a retransmit! dst = %u, src = %u, seq = %u, ack = %u\n", */
2337                            /*        htons(tcphdr->dest), htons(tcphdr->src), */
2338                            /*        htonl(tcphdr->seqno), htonl(tcphdr->ackno)); */
2339                        }
2340                        sock->peer_seq = new_peer_seq;
2341                        sock->next_ack = sock->peer_seq + pkt_len;
2342                        if((TCPH_FLAGS(tcphdr) & TCP_SYN) || (TCPH_FLAGS(tcphdr) & TCP_FIN)) {
2343                            sock->next_ack++;
2344                        }
2345                        /* printf("process_received_packet: Assigning %p, %x\n", sock, sock->next_ack); */
2346                    }
2347
2348                    // Get new TX packet and send ACK
2349                    struct packet *newp = get_tx_packet();
2350                    newp->len = sizeof(struct pkt_tcp_headers);
2351                    newp->next = NULL;
2352
2353                    // Slap TCP/IP/Ethernet headers in front
2354                    memcpy(newp->payload, &packet_tcp_header, sizeof(struct pkt_tcp_headers));
2355
2356                    // Fine-tune headers
2357                    struct pkt_tcp_headers *ph = (struct pkt_tcp_headers *)newp->payload;
2358                    ph->ip.dest.addr = iphdr->src.addr;
2359                    ph->tcp.dest = tcphdr->src;
2360                    ph->eth.dest = ethhdr->src;
2361                    ph->tcp.src = tcphdr->dest;
2362                    ph->ip._len = htons(sizeof(struct tcp_hdr) + IP_HLEN);
2363                    if(sock != NULL) {
2364                        ph->tcp.seqno = htonl(sock->my_seq);
2365                        ph->tcp.ackno = htonl(sock->next_ack);
2366                        /* printf("process_received_packet: Sending %p, seq %x, ack %x\n", sock, sock->my_seq, sock->next_ack); */
2367                    } else {
2368                        ph->tcp.seqno = tcphdr->ackno;
2369                        ph->tcp.ackno = htonl(htonl(tcphdr->seqno) + pkt_len + 1);
2370                    }
2371                    TCPH_FLAGS_SET(&ph->tcp, TCP_ACK);
2372                    TCPH_HDRLEN_SET(&ph->tcp, 5);   // 20 / 4
2373                    ph->tcp.wnd = htons(11680);
2374#ifdef CONFIG_QEMU_NETWORK
2375                    ph->ip._chksum = inet_chksum(&ph->ip, IP_HLEN);
2376                    ph->tcp.chksum = 0;
2377                    void *oldpayload = newp->payload;
2378                    size_t oldlen = newp->len;
2379                    newp->payload = (uint8_t *)&ph->tcp;
2380                    newp->len -= (uint8_t *)&ph->tcp - (uint8_t *)oldpayload;
2381                    ph->tcp.chksum = inet_chksum_pseudo(newp, (ip_addr_t *)&ph->ip.src, (ip_addr_t *)&ph->ip.dest,
2382                                                       IP_PROTO_TCP, TCP_HLEN);
2383                    newp->payload = oldpayload;
2384                    newp->len = oldlen;
2385                    newp->flags = 0;
2386#else
2387                    // Hardware IP/TCP header checksumming on
2388                    ph->ip._chksum = 0;
2389                    ph->tcp.chksum =
2390                        (~inet_chksum_pseudo_partial(newp, (ip_addr_t *)&ph->ip.src, (ip_addr_t *)&ph->ip.dest,
2391                                                     IP_PROTO_TCP, TCP_HLEN, 0)) & 0xffff;
2392                    newp->flags = (NETIF_TXFLAG_IPCHECKSUM | NETIF_TXFLAG_TCPCHECKSUM) |
2393                        (TCPH_HDRLEN(&ph->tcp) << NETIF_TXFLAG_TCPHDRLEN_SHIFT);
2394#endif
2395
2396                    packet_output(newp);
2397
2398                    // Ignore retransmits -- we've already ACKed them again
2399                    if(is_retransmit) {
2400                        goto out;
2401                    }
2402                }
2403
2404                if(sock == NULL) {
2405                    if(htons(tcphdr->dest) != 8080) {
2406                        goto out;
2407                    } else if(!(TCPH_FLAGS(tcphdr) & TCP_SYN)) {
2408                        /* size_t psize = htons(iphdr->_len) - (TCPH_HDRLEN(tcphdr) * 4); */
2409                        /* if(psize > IPH_HL(iphdr) * 4) { */
2410                        /*     printf("Dropping 8080 data packet! src port = %u, payload size = %zu\n", htons(tcphdr->src), psize); */
2411                        /* } */
2412                        goto out;
2413                    }
2414                }
2415
2416                // Ignore stray ACKs, signaling connection establishments
2417                // This will also throw away empty FIN-ACKs
2418                if((TCPH_FLAGS(tcphdr) & TCP_ACK) &&
2419                   htons(iphdr->_len) - (TCPH_HDRLEN(tcphdr) * 4) == IPH_HL(iphdr) * 4) {
2420                    goto out;
2421                }
2422
2423#ifdef DEBUG_LATENCIES
2424                {
2425                    memcache_packets_received++;
2426                    port_cnt[htons(tcphdr->src)]++;
2427                    /* protocol_binary_request_no_extras *mypayload = (void *)p->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4) + sizeof(struct udp_hdr) + UDP_HEADLEN; */
2428                    /* mypayload->message.header.request.opaque = get_time(); */
2429                }
2430#endif
2431            }
2432
2433	accept:
2434            // ARP management
2435            if(peers_get_from_ip(iphdr->src.addr) == NULL) {
2436                struct peer *newpeer = peers_get_next_free();
2437                assert(p != NULL);
2438
2439                newpeer->ip = iphdr->src.addr;
2440                memcpy(&newpeer->mac.addr, &ethhdr->src.addr, ETHARP_HWADDR_LEN);
2441            }
2442
2443            // Push packets up - signal channel
2444            assert(inpkt == NULL);
2445            inpkt = p;
2446            if (waitset_chan_is_registered(&recv_chanstate)) {
2447                errval_t err = waitset_chan_trigger(&recv_chanstate);
2448                assert(err_is_ok(err));
2449            }
2450
2451            // Return here, packet is in flight to user-space
2452            return;
2453        }
2454        break;
2455
2456    default:
2457        break;
2458    }
2459
2460 out:
2461    {
2462        //now we have consumed the preregistered pbuf containing a received packet
2463        //which was processed in this function. Therefore we have to register a new
2464        //free buffer for receiving packets.
2465#ifdef DEBUG_LATENCIES
2466        rx_packets_available++;
2467#endif
2468        errval_t err = rx_register_buffer_fn_ptr(p->pa, p->payload, p);
2469        assert(err_is_ok(err));
2470    }
2471}
2472
2473static arranet_tx_done_fn arranet_tx_done_callback = NULL;
2474
2475void arranet_register_tx_done_callback(arranet_tx_done_fn callback)
2476{
2477    arranet_tx_done_callback = callback;
2478}
2479
2480bool handle_tx_done(void *opaque)
2481{
2482    struct packet *p = opaque;
2483    if(p >= tx_packets && p < &tx_packets[MAX_PACKETS]) {
2484        /* printf("Packet from TX ring, marking available\n"); */
2485        // Mark packet as available, if coming from TX packet array
2486        p->len = 0;
2487        /* tx_packets_available++; */
2488#ifndef SENDMSG_WITH_COPY
2489    } else {
2490        if(opaque != NULL && arranet_tx_done_callback != NULL) {
2491            /* printf("Packet from app, handing up\n"); */
2492            arranet_tx_done_callback(opaque);
2493        /* } else { */
2494        /*     if(opaque == NULL) { */
2495        /*         printf("NULL packet\n"); */
2496        /*     } */
2497        }
2498#endif
2499    }
2500
2501    return true;
2502}
2503
2504/* allocate a single frame, mapping it into our vspace with given attributes */
2505static void *alloc_map_frame(vregion_flags_t attr, size_t size, struct capref *retcap)
2506{
2507    struct capref frame;
2508    errval_t r;
2509
2510    r = frame_alloc(&frame, size, NULL);
2511    assert(err_is_ok(r));
2512    void *va;
2513    r = vspace_map_one_frame_attr(&va, size, frame, attr,
2514                                  NULL, NULL);
2515    if (err_is_fail(r)) {
2516        DEBUG_ERR(r, "vspace_map_one_frame failed");
2517        return NULL;
2518    }
2519
2520    if (retcap != NULL) {
2521        *retcap = frame;
2522    }
2523
2524    return va;
2525}
2526
2527bool lwip_sock_is_open(int s)
2528{
2529    if(arranet_tcp_accepted) {
2530        struct socket *sock = &sockets[s];
2531        assert(sock != NULL);
2532        return !sock->hangup;
2533    } else {
2534        // XXX: Not supported on UDP yet...
2535        return true;
2536    }
2537}
2538
2539/**
2540 * \brief Check if a read on the socket would not block.
2541 *
2542 * \param socket    Socket to check.
2543 * \return          Whether or not the socket is ready.
2544 */
2545bool lwip_sock_ready_read(int s)
2546{
2547    if(arranet_tcp_accepted) {
2548        /* printf("lwip_sock_ready_read(%d)\n", s); */
2549        if(inpkt != NULL) {
2550            struct socket *sock = &sockets[s];
2551            assert(sock != NULL);
2552            struct ip_hdr *iphdr = (struct ip_hdr *)(inpkt->payload + SIZEOF_ETH_HDR);
2553            assert(IPH_PROTO(iphdr) == IP_PROTO_TCP);
2554            struct tcp_hdr *tcphdr = (struct tcp_hdr *)(inpkt->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4));
2555            if(sock->passive) {
2556                return (TCPH_FLAGS(tcphdr) & TCP_SYN) ? true : false;
2557            } else {
2558                if(TCPH_FLAGS(tcphdr) & TCP_SYN) {
2559                    return false;
2560                }
2561
2562                if(tcphdr->dest == sock->bound_addr.sin_port &&
2563                   tcphdr->src == sock->peer_addr.sin_port) {
2564                    return true;
2565                } else {
2566#if 0
2567                    // XXX: Remove when code works...
2568                    struct fdtab_entry e;
2569                    e.type = FDTAB_TYPE_LWIP_SOCKET;
2570                    e.fd = inpkt->sock->fd;
2571                    int rfd = fdtab_search(&e);
2572                    struct fdtab_entry *ne = fdtab_get_sane(rfd);
2573                    if(ne->epoll_fd == -1) {
2574                        printf("Sock: %d, Last: %p %p %p\n", inpkt->sock->fd,
2575                               ne->last[0], ne->last[1], ne->last[2]);
2576
2577                        // Drop the packet
2578#ifdef DEBUG_LATENCIES
2579                        rx_packets_available++;
2580#endif
2581                        errval_t err = rx_register_buffer_fn_ptr(inpkt->pa, inpkt->payload, inpkt);
2582                        assert(err_is_ok(err));
2583                        inpkt = NULL;
2584                    }
2585                    /* assert(ne->epoll_fd != -1); */
2586#endif
2587
2588                    return false;
2589                }
2590            }
2591        } else {
2592            return false;
2593        }
2594    } else {
2595        assert(arranet_udp_accepted || arranet_raw_accepted);
2596        return inpkt != NULL;
2597    }
2598}
2599
2600/**
2601 * \brief Check if a write on the socket would not block.
2602 *
2603 * \param socket    Socket to check.
2604 * \return          Whether or not the socket is ready.
2605 */
2606bool lwip_sock_ready_write(int s)
2607{
2608    if(arranet_tcp_accepted) {
2609        struct socket *sock = &sockets[s];
2610        assert(sock != NULL);
2611        if(sock->connected) {
2612            // See if there's space in the queue
2613#ifdef SENDMSG_WITH_COPY
2614            /* if(tx_packets[tx_idx].len != 0) { */
2615            /*     return false; */
2616            /* } else { */
2617            // XXX: Just always return true, we will stall a little when calling get_tx_packet() if no output packet is available then.
2618                return true;
2619            /* } */
2620#else
2621            assert(!"NYI");
2622#endif
2623        } else {
2624            return false;
2625        }
2626        /* printf("lwip_sock_ready_write(%d)\n", s); */
2627    } else {
2628        assert(arranet_udp_accepted);
2629
2630        return tx_packets[tx_idx].len == 0 ? true : false;
2631        // XXX: Can also return true when one buffer is available in queue
2632        // return e1000n_queue_empty();
2633    }
2634}
2635
2636static void do_nothing(void *arg)
2637{
2638}
2639
2640/**
2641 * \brief Deregister previously registered waitset on which an event is delivered
2642 *        when the socket is ready for reading.
2643 */
2644errval_t lwip_sock_waitset_deregister_read(int sock)
2645{
2646    return waitset_chan_deregister(&recv_chanstate);
2647}
2648
2649/**
2650 * \brief Register a waitset on which an event is delivered when the socket is
2651 *        ready for reading.
2652 *
2653 * The event is triggered ONCE, when the socket becomes ready for reading. If
2654 * the socket is already ready, the event is triggered right away.
2655 *
2656 * \param socket    Socket
2657 * \param ws        Waitset
2658 */
2659errval_t lwip_sock_waitset_register_read(int sock, struct waitset *ws)
2660{
2661    errval_t err;
2662
2663    assert(ws != NULL);
2664
2665    if(waitset_chan_is_registered(&recv_chanstate) || recv_chanstate.state == CHAN_PENDING) {
2666        assert(recv_chanstate.waitset == ws);
2667        return SYS_ERR_OK;
2668    }
2669
2670    waitset_chanstate_init(&recv_chanstate, CHANTYPE_LWIP_SOCKET);
2671
2672    err = waitset_chan_register_polled(ws, &recv_chanstate,
2673                                       MKCLOSURE(do_nothing, NULL));
2674    if (err_is_fail(err)) {
2675        DEBUG_ERR(err, "Error register recv channel on waitset.");
2676        return err;
2677    }
2678
2679    /* if socket is ready, trigger event right away */
2680    if (lwip_sock_ready_read(sock)) {
2681        err = waitset_chan_trigger(&recv_chanstate);
2682        if (err_is_fail(err)) {
2683            DEBUG_ERR(err, "Error trigger event on recv channel.");
2684            return err;
2685        }
2686    }
2687
2688    return SYS_ERR_OK;
2689}
2690
2691/**
2692 * \brief Deregister previously registered waitset on which an event is delivered
2693 *        when the socket is ready for writing.
2694 */
2695errval_t lwip_sock_waitset_deregister_write(int sock)
2696{
2697    return waitset_chan_deregister(&send_chanstate);
2698}
2699
2700/**
2701 * \brief Register a waitset on which an event is delivered when the socket is
2702 *        ready for writing.
2703 *
2704 * The event is triggered ONCE, when the socket becomes ready for writing. If
2705 * the socket is already ready, the event is triggered right away.
2706 *
2707 * \param socket    Socket
2708 * \param ws        Waitset
2709 */
2710errval_t lwip_sock_waitset_register_write(int sock, struct waitset *ws)
2711{
2712    errval_t err;
2713
2714    assert(ws != NULL);
2715
2716    if(waitset_chan_is_registered(&send_chanstate) || send_chanstate.state == CHAN_PENDING) {
2717        assert(send_chanstate.waitset == ws);
2718        return SYS_ERR_OK;
2719    }
2720
2721    waitset_chanstate_init(&send_chanstate, CHANTYPE_LWIP_SOCKET);
2722
2723    err = waitset_chan_register_polled(ws, &send_chanstate,
2724                                       MKCLOSURE(do_nothing, NULL));
2725    if (err_is_fail(err)) {
2726        DEBUG_ERR(err, "Error register send channel on waitset.");
2727        return err;
2728    }
2729
2730    /* if socket is ready, trigger event right away */
2731    if (lwip_sock_ready_write(sock)) {
2732        err = waitset_chan_trigger(&send_chanstate);
2733        if (err_is_fail(err)) {
2734            DEBUG_ERR(err, "Error trigger event on send channel.");
2735            return err;
2736        }
2737    }
2738
2739    return SYS_ERR_OK;
2740}
2741
2742void arranet_polling_loop_proxy(void);
2743void arranet_polling_loop_proxy(void)
2744{
2745    // Push packets up - signal channel
2746    if(inpkt != NULL) {
2747#if 0
2748        static struct packet *lastinpkt = NULL;
2749        static int count = 0;
2750
2751        if(inpkt == lastinpkt) {
2752            count++;
2753            if(count > 1000) {
2754                struct ip_hdr *iphdr = (struct ip_hdr *)(inpkt->payload + SIZEOF_ETH_HDR);
2755                assert(IPH_PROTO(iphdr) == IP_PROTO_TCP);
2756                struct tcp_hdr *tcphdr = (struct tcp_hdr *)(inpkt->payload + SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4));
2757                size_t hdr_len = SIZEOF_ETH_HDR + (IPH_HL(iphdr) * 4) + (TCPH_HDRLEN(tcphdr) * 4);
2758                char *payload = (char *)inpkt->payload + hdr_len;
2759                uint16_t pkt_len = htons(IPH_LEN(iphdr)) - (TCPH_HDRLEN(tcphdr) * 4) - (IPH_HL(iphdr) * 4);
2760
2761                printf("Packet in queue too long, dst = %u, src = %u\n",
2762                       htons(tcphdr->dest), htons(tcphdr->src));
2763
2764                if(TCPH_FLAGS(tcphdr) & TCP_SYN) {
2765                    printf("SYN set\n");
2766                }
2767
2768                if(TCPH_FLAGS(tcphdr) & TCP_ACK) {
2769                    printf("ACK set\n");
2770                }
2771
2772                if(TCPH_FLAGS(tcphdr) & TCP_FIN) {
2773                    printf("FIN set\n");
2774                }
2775
2776                if(TCPH_FLAGS(tcphdr) & TCP_RST) {
2777                    printf("RST set\n");
2778                }
2779
2780                printf("Seq = %u, Ack = %u\n", tcphdr->seqno, tcphdr->ackno);
2781
2782                if(pkt_len > 0) {
2783                    printf("payload = '%s'\n", payload);
2784                }
2785
2786#ifdef DEBUG_LATENCIES
2787                rx_packets_available++;
2788#endif
2789                errval_t err = rx_register_buffer_fn_ptr(inpkt->pa, inpkt->payload, inpkt);
2790                assert(err_is_ok(err));
2791                inpkt = NULL;
2792            }
2793        } else {
2794            lastinpkt = inpkt;
2795            count = 0;
2796        }
2797#endif
2798
2799        if (waitset_chan_is_registered(&recv_chanstate)) {
2800            errval_t err = waitset_chan_trigger(&recv_chanstate);
2801            assert(err_is_ok(err));
2802        }
2803    } else {
2804        arranet_polling_loop();
2805    }
2806}
2807
2808static const char *eat_opts[] = {
2809    "function=", "interrupts=", "queue=", "msix=", "vf=", "device=", "bus=", "use_vtd=",
2810    NULL
2811};
2812
2813void lwip_arrakis_start(int *argc, char ***argv)
2814{
2815    uint8_t mac[6];
2816
2817    waitset_chanstate_init(&recv_chanstate, CHANTYPE_LWIP_SOCKET);
2818    waitset_chanstate_init(&send_chanstate, CHANTYPE_LWIP_SOCKET);
2819
2820    errval_t err = skb_client_connect();
2821    assert(err_is_ok(err));
2822
2823    err = skb_execute_query("vtd_enabled(0,C), write(vtd_coherency(C)).");
2824    if (err_is_ok(err)) {
2825        use_vtd = 1;
2826        for(int i = 0; i < *argc; i++) {
2827	    if(!strncmp((*argv)[i], "use_vtd=", strlen("use_vtd=") - 1)) {
2828	      use_vtd = !!atol((*argv)[i] + strlen("use_vtd="));
2829                break;
2830            }
2831        }
2832	err = skb_read_output("vtd_coherency(%d)", &vtd_coherency);
2833	assert(err_is_ok(err));
2834    }
2835
2836    if (use_vtd) {
2837        err = connect_to_acpi();
2838	assert(err_is_ok(err));
2839	err = vtd_create_domain(cap_vroot);
2840	assert(err_is_ok(err));
2841	err = vtd_domain_add_device(0, 13, 16, 1, cap_vroot);
2842	assert(err_is_ok(err));
2843    }
2844
2845    e1000n_driver_init(*argc, *argv);
2846
2847    ether_get_mac_address_ptr(mac);
2848    printf("Arranet MAC address %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx\n",
2849           mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
2850
2851    struct capref frame;
2852    uint8_t *ram_base = alloc_map_frame(VREGION_FLAGS_READ_WRITE,
2853                                        MAX_PACKETS * PACKET_SIZE + 4096, &frame);
2854    assert(ram_base != NULL);
2855
2856    struct frame_identity id;
2857    err = frame_identify(frame, &id);
2858    assert(err_is_ok(err));
2859
2860    rx_pbase = id.base;
2861    rx_vbase = (genvaddr_t)ram_base;
2862
2863    // Add buffers to RX ring for packet reception
2864    for(int i = 0; i < MAX_PACKETS; i++) {
2865        struct packet *p = &rx_packets[i];
2866
2867        // XXX: Use this for recvfrom_arranet to get alignment
2868        /* p->payload = ram_base + (i * PACKET_SIZE) + 6; */
2869        /* p->pa = id.base + (i * PACKET_SIZE) + 6; */
2870        p->payload = ram_base + (i * PACKET_SIZE);
2871        p->pa = id.base + (i * PACKET_SIZE);
2872        p->len = PACKET_SIZE;
2873        p->flags = 0;
2874
2875        err = rx_register_buffer_fn_ptr(p->pa, p->payload, p);
2876        assert(err_is_ok(err));
2877    }
2878
2879    // Allocate TX buffers (to have them all backed by one frame)
2880    uint8_t *tx_bufs = alloc_map_frame(VREGION_FLAGS_READ_WRITE,
2881                                       MAX_PACKETS * PACKET_SIZE, &frame);
2882    assert(tx_bufs != NULL);
2883
2884    err = frame_identify(frame, &id);
2885    assert(err_is_ok(err));
2886    tx_pbase = id.base;
2887    tx_vbase = (genvaddr_t)tx_bufs;
2888
2889    // Initialize TX packet descriptors
2890    for(int i = 0; i < MAX_PACKETS; i++) {
2891        /* tx_packets[i].payload = tx_bufs[i]; */
2892        tx_packets[i].payload = tx_bufs + (i * PACKET_SIZE);
2893    }
2894
2895    if (!vtd_coherency) {// For the UDP echo server
2896        sys_debug_flush_cache();
2897    }
2898
2899    // Determine my static IP address
2900    for(int i = 0; i < sizeof(ip_config) / sizeof(struct mac2ip); i++) {
2901        struct mac2ip *e = &ip_config[i];
2902        if(!memcmp(mac, e->mac, ETHARP_HWADDR_LEN)) {
2903            arranet_myip = htonl(e->ip);
2904            memcpy(arranet_mymac, e->mac, ETHARP_HWADDR_LEN);
2905            break;
2906        }
2907    }
2908
2909    if(arranet_myip == 0) {
2910        USER_PANIC("Arranet: No static IP config for this MAC address!\n");
2911    }
2912
2913    /***** Initialize IP/Ethernet packet header template *****/
2914    {
2915        struct pkt_ip_headers *p = &packet_ip_header;
2916
2917        // Initialize Ethernet header
2918        memcpy(&p->eth.src, mac, ETHARP_HWADDR_LEN);
2919        p->eth.type = htons(ETHTYPE_IP);
2920
2921        // Initialize IP header
2922        p->ip._v_hl = 69;
2923        p->ip._tos = 0;
2924        p->ip._id = htons(3);
2925        p->ip._offset = 0;
2926        p->ip._ttl = 0xff;
2927        p->ip._proto = 0;
2928        p->ip._chksum = 0;
2929        p->ip.src.addr = arranet_myip;
2930    }
2931
2932    /***** Initialize UDP/IP/Ethernet packet header template *****/
2933    {
2934        struct pkt_udp_headers *p = &packet_udp_header;
2935
2936        // Initialize Ethernet header
2937        memcpy(&p->eth.src, mac, ETHARP_HWADDR_LEN);
2938        p->eth.type = htons(ETHTYPE_IP);
2939
2940        // Initialize IP header
2941        p->ip._v_hl = 69;
2942        p->ip._tos = 0;
2943        p->ip._id = htons(3);
2944        p->ip._offset = 0;
2945        p->ip._ttl = 0xff;
2946        p->ip._proto = IP_PROTO_UDP;
2947        p->ip._chksum = 0;
2948        p->ip.src.addr = arranet_myip;
2949
2950        // Initialize UDP header
2951        p->udp.chksum = 0;
2952    }
2953
2954    /***** Initialize TCP/IP/Ethernet packet header template *****/
2955    {
2956        struct pkt_tcp_headers *p = &packet_tcp_header;
2957
2958        // Initialize Ethernet header
2959        memcpy(&p->eth.src, mac, ETHARP_HWADDR_LEN);
2960        p->eth.type = htons(ETHTYPE_IP);
2961
2962        // Initialize IP header
2963        p->ip._v_hl = 69;
2964        p->ip._tos = 0;
2965        p->ip._id = htons(3);
2966        p->ip._offset = 0;
2967        p->ip._ttl = 0xff;
2968        p->ip._proto = IP_PROTO_TCP;
2969        p->ip._chksum = 0;
2970        p->ip.src.addr = arranet_myip;
2971
2972        // Initialize TCP header
2973        p->tcp.chksum = 0;
2974        p->tcp.wnd = 65535;
2975    }
2976
2977    // Initialize queue of free TCP ports
2978    for(u16_t i = 0; i <= TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START; i++) {
2979        free_tcp_ports[i] = htons(TCP_LOCAL_PORT_RANGE_START + i);
2980    }
2981
2982    // Initialize queue of free sockets
2983    for(int i = 0; i < MAX_FD; i++) {
2984        free_sockets_queue[i] = &sockets[i];
2985        sockets[i].fd = i;
2986    }
2987
2988    /***** Eat driver-specific options *****/
2989    static char *new_argv[ARG_MAX];
2990    int new_argc = 0;
2991    for(int i = 0; i < *argc; i++) {
2992        int j;
2993
2994        for(j = 0; eat_opts[j] != NULL; j++) {
2995            if(!strncmp((*argv)[i], eat_opts[j], strlen(eat_opts[j]) - 1)) {
2996                // Option matches -- delete!
2997                break;
2998            }
2999        }
3000
3001        if(eat_opts[j] == NULL) {
3002            // Option doesn't match -- keep!
3003            new_argv[new_argc++] = (*argv)[i];
3004        }
3005    }
3006
3007    *argc = new_argc;
3008    *argv = new_argv;
3009}
3010