1/*
2 * Copyright (c) 2014 ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
8 */
9
10#include <stdio.h>
11#include <stddef.h>
12
13#include <barrelfish/barrelfish.h>
14#include <barrelfish/nameservice_client.h>
15#include <barrelfish/threads.h>
16#include <barrelfish/waitset_chan.h>
17#include <bulk_transfer/bulk_transfer.h>
18#include <pci/pci.h>
19
20#include <if/net_ports_defs.h>
21#include <if/net_ports_defs.h>
22#include <if/net_ARP_defs.h>
23#include <if/net_ARP_defs.h>
24#include <if/e10k_defs.h>
25
26#include <dev/e10k_dev.h>
27#include <dev/e10k_q_dev.h>
28
29#include "bulk_net_backend.h"
30
31#define E10K_MNG_SUF "_e10kmng"
32
33#define ETHHDR_LEN 14
34#define IPHDR_LEN 20
35
36
37#if BULK_NET_ENABLE_DEBUG && BULK_NET_ENABLE_DEBUG_E10K
38#define DEBUG(x...) debug_printf("e10k: " x)
39#if BULK_NET_ENABLE_TRACE
40#define BULK_NET_ 1
41#else
42#endif
43#else
44#define BULK_NET_ENABLE_E10K_TRACE
45#define DEBUG(x...)
46#endif
47
48#define USE_INTERRUPTS 0
49#define USE_WSPOLL 1
50
51struct e10k_rx_event {
52    struct bulk_e10k       *bu;
53    struct bulk_net_msgdesc msg;
54    struct event_queue_node eqn;
55};
56
57struct e10k_tx_event {
58    struct bulk_e10k       *bu;
59    void                   *op;
60    struct event_queue_node eqn;
61};
62
63
64static struct net_ports_binding *net_ports_rpc;
65static bool net_ports_connected = false;
66
67static struct net_ARP_binding *net_arp_rpc;
68static bool net_arp_connected = false;
69
70static errval_t update_rxtail(void *opaque, size_t tail);
71static errval_t update_txtail(void *opaque, size_t tail);
72#if USE_INTERRUPTS
73static void interrupt_handler(void *arg);
74#endif
75#if USE_WSPOLL
76void bulk_e10k_poll(struct waitset_chanstate *chan);
77#endif
78
79/* Declarations for e10k flounder interface */
80static void idc_request_device_info(struct bulk_e10k *bu);
81static void idc_register_queue_memory(struct bulk_e10k *bu);
82static void idc_queue_init_data(struct e10k_binding *b, struct capref registers,
83        uint64_t macaddr);
84static void idc_queue_memory_registered(struct e10k_binding *b);
85static void idc_write_queue_tails(struct e10k_binding *b);
86
87static struct e10k_rx_vtbl rx_vtbl = {
88    .queue_init_data = idc_queue_init_data,
89    .queue_memory_registered = idc_queue_memory_registered,
90    .write_queue_tails = idc_write_queue_tails,
91};
92
93
94/*****************************************************************************/
95/* Port manager client */
96
97/** Bind specific port to queue */
98static errval_t port_bind(uint64_t b_rx, uint64_t b_tx, uint64_t q,
99        uint16_t port)
100{
101    errval_t err, msgerr;
102
103    err = net_ports_rpc->rpc_tx_vtbl.bind_port(net_ports_rpc, net_ports_PORT_UDP, port,
104            b_rx, b_tx, 0, q, &msgerr);
105    if (err_is_fail(err)) {
106        return err;
107    }
108
109    return msgerr;
110}
111
112/** Get any free port and bind it to the queue */
113static errval_t port_get(uint64_t b_rx, uint64_t b_tx, uint64_t q,
114        uint16_t *port)
115{
116    errval_t err, msgerr;
117
118    err = net_ports_rpc->rpc_tx_vtbl.get_port(net_ports_rpc, net_ports_PORT_UDP,
119            b_rx, b_tx, 0, q, &msgerr, port);
120    if (err_is_fail(err)) {
121        return err;
122    }
123
124    return msgerr;
125}
126
127static void p_bind_cb(void *st, errval_t err, struct net_ports_binding *b)
128{
129    assert(err_is_ok(err));
130    net_ports_rpc = b;
131    net_ports_rpc_client_init(net_ports_rpc);
132    net_ports_connected = true;
133}
134
135/** Bind to ports service (currently blocking) */
136static void bind_ports(struct waitset *ws)
137{
138    errval_t err;
139    iref_t iref;
140
141    DEBUG("bind_ports()\n");
142    err = nameservice_blocking_lookup("e10k_PORTS_MNG", &iref);
143    assert(err_is_ok(err));
144    DEBUG("resolved\n");
145
146    err = net_ports_bind(iref, p_bind_cb, NULL, ws, IDC_BIND_FLAGS_DEFAULT);
147    assert(err_is_ok(err));
148    DEBUG("binding initiated\n");
149
150    while (!net_ports_connected) {
151        event_dispatch_non_block(ws);
152        event_dispatch_non_block(get_default_waitset());
153    }
154    DEBUG("bound_ports\n");
155}
156
157
158/*****************************************************************************/
159/* ARP service client */
160
161/** Get information about the local TCP/IP configuration*/
162static errval_t arp_ip_info(uint32_t *ip, uint32_t *gw, uint32_t *mask)
163{
164    errval_t err, msgerr;
165
166    err = net_arp_rpc->rpc_tx_vtbl.ip_info(net_arp_rpc, 0, &msgerr, ip, gw, mask);
167    if (err_is_fail(err)) {
168        return err;
169    }
170    return msgerr;
171}
172
173/** Do an ARP lookup of an ip address */
174static errval_t arp_lookup(uint32_t ip, uint64_t *mac)
175{
176    errval_t err, msgerr;
177
178    err = net_arp_rpc->rpc_tx_vtbl.ARP_lookup(net_arp_rpc, ip, 0, true, &msgerr, mac);
179    if (err_is_fail(err)) {
180        return err;
181    }
182    return msgerr;
183}
184
185static void a_bind_cb(void *st, errval_t err, struct net_ARP_binding *b)
186{
187    assert(err_is_ok(err));
188    net_arp_rpc = b;
189    net_ARP_rpc_client_init(net_arp_rpc);
190    net_arp_connected = true;
191}
192
193/** Bind to ARP service (currently blocking) */
194static void bind_arp(struct waitset *ws)
195{
196    errval_t err;
197    iref_t iref;
198
199    DEBUG("bind_arp()\n");
200    err = nameservice_blocking_lookup("e10k_ARP", &iref);
201    assert(err_is_ok(err));
202    DEBUG("resolved\n");
203
204    err = net_ARP_bind(iref, a_bind_cb, NULL, ws, IDC_BIND_FLAGS_DEFAULT);
205    assert(err_is_ok(err));
206    DEBUG("binding initiated\n");
207
208    while (!net_arp_connected) {
209        event_dispatch_non_block(ws);
210        event_dispatch_non_block(get_default_waitset());
211    }
212    DEBUG("bound_arp\n");
213}
214
215
216/******************************************************************************/
217/* e10k card driver interface */
218
219/** e10k interface: callback for a successful binding */
220static void bind_cb(void *st, errval_t err, struct e10k_binding *b)
221{
222    DEBUG("bind_cb()\n");
223    struct bulk_e10k *bu = st;
224
225    assert(err_is_ok(err));
226
227    b->rx_vtbl = rx_vtbl;
228    b->st = bu;
229    bu->binding = b;
230
231    idc_request_device_info(bu);
232}
233
234
235/** e10k interface: Send request for device information */
236static void idc_request_device_info(struct bulk_e10k *bu)
237{
238    errval_t err;
239
240    DEBUG("idc_request_device_info()\n");
241
242    err = e10k_request_device_info__tx(bu->binding, NOP_CONT);
243    assert(err_is_ok(err));
244
245}
246
247/** e10k interface: Register memory for descriptor rings */
248static void idc_register_queue_memory(struct bulk_e10k *bu)
249{
250    errval_t r = SYS_ERR_OK;
251    DEBUG("idc_register_queue_memory()\n");
252
253    /* r = e10k_register_queue_memory__tx(bu->binding, NOP_CONT, bu->qi, */
254    /*     bu->txframe, bu->txhwbframe, bu->rxframe, bu->buffer_size, E10K_HDRSZ, */
255    /*     bu->int_vector, bu->int_core, USE_INTERRUPTS, false); */
256    assert(err_is_ok(r));
257}
258
259/** e10k interface: Callback for request device info */
260static void idc_queue_init_data(struct e10k_binding *b, struct capref registers,
261        uint64_t macaddr)
262{
263    DEBUG("idc_queue_init_data()\n");
264
265    errval_t err;
266    struct bulk_e10k *bu = b->st;
267    struct frame_identity fid = { .base = 0, .bytes = 0 };
268    void *virt, *rx, *tx, *txhwb;
269    uint8_t core;
270    struct e10k_queue_ops ops = {
271        .update_txtail = update_txtail,
272        .update_rxtail = update_rxtail
273    };
274
275    bu->mac = macaddr;
276
277    // Map registers
278    frame_identify(registers, &fid);
279    err = vspace_map_one_frame_attr(&virt, fid.bytes, registers,
280            VREGION_FLAGS_READ_WRITE_NOCACHE, NULL, NULL);
281    assert(err_is_ok(err));
282
283    // Initialize mackerel device (must only be used for queue index register)
284    e10k_initialize(&bu->d, virt);
285
286    // Allocate and initialize memory for queues
287    err = allocmap_frame(bu->ring_size * E10K_DESCSZ, &rx, NULL, &bu->rxframe);
288    assert(err_is_ok(err));
289    err = allocmap_frame(bu->ring_size * E10K_DESCSZ, &tx, NULL, &bu->txframe);
290    assert(err_is_ok(err));
291    err = allocmap_frame(0x1000, &txhwb, NULL, &bu->txhwbframe);
292    assert(err_is_ok(err));
293
294    bu->q = e10k_queue_init(tx, bu->ring_size, txhwb, rx, bu->ring_size,
295                            &ops, bu);
296
297    // Setup interrupt
298#if USE_INTERRUPTS
299    err = pci_setup_inthandler(interrupt_handler, bu, &bu->int_vector);
300    assert(err_is_ok(err));
301    bu->int_core = disp_get_core_id();
302
303#endif
304
305    DEBUG("idc_queue_init_data: done\n");
306
307    // Register ring memory with driver
308    core = disp_get_core_id();
309    idc_register_queue_memory(bu);
310}
311
312/** e10k interface: Callback for register queue memory */
313static void idc_queue_memory_registered(struct e10k_binding *b)
314{
315    struct bulk_e10k *bu = b->st;
316    DEBUG("idc_queue_memory_registered()\n");
317
318    bu->ready = true;
319}
320
321/**
322 * e10k interface: Callback for writing out queue tails (needed in case of card
323 * hangs)
324 */
325static void idc_write_queue_tails(struct e10k_binding *b)
326{
327    struct bulk_e10k *bu = b->st;
328    DEBUG("idc_write_queue_tails()\n");
329    e10k_queue_bump_rxtail(bu->q);
330    e10k_queue_bump_txtail(bu->q);
331}
332
333
334/*****************************************************************************/
335/* e10k queue management */
336
337static void recv_event(void *arg)
338{
339    DEBUG("recv_event\n");
340    struct e10k_rx_event *rxe = arg;
341    rxe->bu->received(rxe->bu, &rxe->msg);
342    stack_alloc_free(&rxe->bu->rx_event_alloc, rxe);
343}
344
345/** Try to process one packet in the receive queue */
346static bool recv_one(struct bulk_e10k *bu)
347{
348    void *op;
349    size_t len, hdrlen, i;
350    int last = 0, res;
351    uint64_t flags = 0;
352    struct e10k_rx_event *rxe = NULL; // Fix compile bug -- jb
353
354
355    i = 0;
356    do {
357        res = e10k_queue_get_rxbuf(bu->q, &op, &hdrlen, &len, &last, &flags);
358        if (res == 0) {
359            if (i == 0) {
360                rxe = stack_alloc_alloc(&bu->rx_event_alloc);
361                 assert(rxe != NULL); // should never happen
362            }
363            DEBUG("    Received part[%"PRId64"] of packet op=%p hl=%"PRIx64" l=%"
364                    PRIx64" f=%"PRIx64"\n", i, op, hdrlen, len, flags);
365        }
366        if (i == 0 && res != 0) {
367            return false;
368        } else if (res != 0) {
369            continue;
370        } else if ((i + !!hdrlen) >= BULK_NET_DESCLEN) {
371            USER_PANIC("Buffer chain longer than supported");
372        }
373
374        if (hdrlen > 0) {
375            rxe->msg.parts[i].size = hdrlen;
376            rxe->msg.parts[i].opaque = op;
377            i++;
378        }
379
380        rxe->msg.parts[i].size = len;
381        rxe->msg.parts[i].opaque = op;
382
383        i++;
384    } while (last != 1);
385
386    if (i < BULK_NET_DESCLEN) {
387        memset(&rxe->msg.parts[i], 0, sizeof(rxe->msg.parts[i]));
388    }
389
390#if !USE_INTERRUPTS && USE_WSPOLL
391    recv_event(rxe);
392#else
393    event_queue_add(&bu->event_queue, &rxe->eqn,
394        MKCLOSURE(recv_event, rxe));
395#endif
396
397    return true;
398}
399
400static void tx_event(void *arg)
401{
402    DEBUG("tx_event\n");
403    struct e10k_tx_event *txe = arg;
404    txe->bu->transmitted(txe->bu, txe->op);
405    stack_alloc_free(&txe->bu->tx_event_alloc, txe);
406
407}
408
409/** Check thee tx queues for transmits that have finshed */
410static bool check_tx(struct bulk_e10k *bu)
411{
412    void *op = NULL;
413    bool had = false;
414    struct e10k_tx_event *txe;
415
416#if 0
417    if (e10k_tdt_rd(&bu->d, bu->qi) != e10k_tdh_rd(&bu->d, bu->qi)) {
418        DEBUG("Nonempty: %"PRIx32" %"PRIx32"\n", e10k_tdt_rd(&bu->d,
419                    bu->qi), e10k_tdh_rd(&bu->d, bu->qi));
420    }
421#endif
422    if (e10k_queue_get_txbuf(bu->q, &op) == 0) {
423        DEBUG("e10k packet sent\n");
424        txe = stack_alloc_alloc(&bu->tx_event_alloc);
425        assert(txe != NULL); // should never happen
426        txe->op = op;
427#if !USE_INTERRUPTS && USE_WSPOLL
428        tx_event(txe);
429#else
430        event_queue_add(&bu->event_queue, &txe->eqn,
431            MKCLOSURE(tx_event, txe));
432#endif
433        had = true;
434    }
435    return had;
436}
437
438#if USE_INTERRUPTS
439/** Interrupt handler for RX and TX events */
440static void interrupt_handler(void *arg)
441{
442    struct bulk_e10k *bu = arg;
443    DEBUG("Interrupt!\n");
444    while (recv_one(bu));
445    while (check_tx(bu));
446}
447#else
448#if USE_WSPOLL
449
450static inline struct bulk_e10k *wscs_to_e10k(struct waitset_chanstate *chan)
451{
452    return (struct bulk_e10k *)
453       ((uintptr_t) chan - offsetof(struct bulk_e10k, wscs));
454}
455
456static void ws_event(void *arg)
457{
458    struct bulk_e10k *bu = arg;
459    bool found, cur;
460    do {
461        found = false;
462        do {
463            cur = recv_one(bu);
464            found = found || cur;
465        } while (cur);
466        do {
467            cur = check_tx(bu);
468            found = found || cur;
469        } while (cur);
470    } while (found);
471
472    waitset_chan_register_polled(bu->waitset, &bu->wscs,
473                    MKCLOSURE(ws_event, bu));
474
475}
476
477void bulk_e10k_poll(struct waitset_chanstate *chan)
478{
479    struct bulk_e10k *bu = wscs_to_e10k(chan);
480    // Check TX queue first, since it is cheaper
481    if (e10k_queue_get_txpoll(bu->q) != 0 &&
482        e10k_queue_rxpoll(bu->q) != 0)
483    {
484        return;
485    }
486
487    waitset_chan_trigger(chan);
488}
489
490#else
491
492/** Thread polling rx and tx queues */
493static int recv_thread(void *arg)
494{
495    struct bulk_e10k *bu = arg;
496    DEBUG("Start receiving thread...\n");
497    bool found;
498    while (1) {
499        found = check_tx(bu);
500        found = recv_one(bu) || found;
501        if (!found) {
502            thread_yield();
503        }
504    }
505    return 0;
506}
507#endif
508#endif
509
510
511/** Callback for queue manager (writes tx tail index) */
512static errval_t update_txtail(void *opaque, size_t tail)
513{
514    struct bulk_e10k *bu = opaque;
515    e10k_tdt_wr(&bu->d, bu->qi, tail);
516    return SYS_ERR_OK;
517}
518
519/** Callback for queue manager (writes rx tail index) */
520static errval_t update_rxtail(void *opaque, size_t tail)
521{
522    struct bulk_e10k *bu = opaque;
523    e10k_rdt_1_wr(&bu->d, bu->qi, tail);
524    return SYS_ERR_OK;
525}
526
527
528/*****************************************************************************/
529/* Public interface */
530
531/**
532 * Initialize directly mapped RX/TX queue pair with e10k NIC.
533 *
534 * @param bu          Channel struct
535 * @param ws          Waitset
536 * @param card        Card name
537 * @param queue       Queue ID to use
538 * @param buffer_size Size of receive buffers in bytes
539 * @param ring_size   Number of descriptors in the RX/TX rings
540 * @param received    Callback for a received packet
541 * @param transmitted Callback for a transmitted packet
542 */
543errval_t bulk_e10k_init(struct bulk_e10k *bu,
544                        struct waitset *ws,
545                        const char *card,
546                        uint8_t queue,
547                        size_t buffer_size,
548                        size_t ring_size,
549                        void (*received)(struct bulk_e10k *,
550                                         struct bulk_net_msgdesc *),
551                        void (*transmitted)(struct bulk_e10k *, void *))
552{
553    errval_t err;
554    char name[strlen(card) + strlen(E10K_MNG_SUF) + 1];
555    iref_t iref;
556    struct e10k_rx_event *rxe;
557    struct e10k_tx_event *txe;
558    size_t i;
559
560
561    bu->qi = queue;
562    bu->ready = false;
563    bu->received = received;
564    bu->transmitted = transmitted;
565    bu->buffer_size = buffer_size;
566    bu->ring_size = ring_size;
567    bu->waitset = ws;
568
569    // Allocate events
570    stack_alloc_init(&bu->rx_event_alloc, ring_size);
571    stack_alloc_init(&bu->tx_event_alloc, ring_size);
572    rxe = calloc(ring_size, sizeof(*rxe));
573    txe = calloc(ring_size, sizeof(*txe));
574    for (i = 0; i < ring_size; i++) {
575        rxe[i].bu = bu;
576        txe[i].bu = bu;
577        stack_alloc_free(&bu->rx_event_alloc, rxe + i);
578        stack_alloc_free(&bu->tx_event_alloc, txe + i);
579    }
580
581    // Connect to port management service
582    bind_ports(ws);
583    bind_arp(ws);
584
585    // Bind to e10k card driver
586    strcpy(name, card);
587    strcat(name, E10K_MNG_SUF);
588    err = nameservice_blocking_lookup(name, &iref);
589    assert(err_is_ok(err));
590
591    DEBUG("Start binding\n");
592    err = e10k_bind(iref, bind_cb, bu, ws, IDC_BIND_FLAGS_DEFAULT);
593    assert(err_is_ok(err));
594
595    while (!bu->ready) {
596        event_dispatch_non_block(ws);
597        event_dispatch_non_block(get_default_waitset());
598    }
599
600#if USE_INTERRUPTS || !USE_WSPOLL
601    event_queue_init(&bu->event_queue, ws, EVENT_QUEUE_CONTINUOUS);
602#endif
603#if !USE_INTERRUPTS
604#if USE_WSPOLL
605    waitset_chanstate_init(&bu->wscs, CHANTYPE_BULK_E10K);
606    waitset_chan_register_polled(ws, &bu->wscs,
607            MKCLOSURE(ws_event, bu));
608#else
609    thread_create(recv_thread, bu);
610#endif
611#endif
612
613    return SYS_ERR_OK;
614}
615
616/**
617 * Add a buffer to the receive queue.
618 *
619 * @param bu     Channel struct
620 * @param phys   Physical address of buffer
621 * @param header Physical address of header buffer (needs E10K_HDRSZ bytes)
622 * @param opaque User-Data for this buffer, will be returned when it is used in
623 *               a received packet.
624 */
625errval_t bulk_e10k_rx_add(struct bulk_e10k *bu, uint64_t phys, uint64_t header,
626                          void *opaque)
627{
628    DEBUG("bulk_e10k_rx_add(transfer=%p, phy=%"PRIx64",header=%"PRIx64",opaque=%p)\n",
629            bu, phys, header, opaque);
630    int r = e10k_queue_add_rxbuf(bu->q, phys, header, opaque);
631    assert(r == 0);
632    e10k_queue_bump_rxtail(bu->q);
633    return SYS_ERR_OK;
634}
635
636/**
637 * Send out a packet.
638 *
639 * @param bu   Channel struct
640 * @param decs Descriptor for buffer chain to transmit
641 */
642errval_t bulk_e10k_send(struct bulk_e10k *bu, struct bulk_net_msgdesc *desc)
643{
644    size_t totallen = 0;
645    size_t cnt = 0;
646    size_t i;
647    for (i = 0; i < BULK_NET_DESCLEN; i++) {
648        if (desc->parts[i].size == 0) {
649            break;
650        }
651        cnt++;
652        totallen += desc->parts[i].size;
653    }
654    DEBUG("bulk_e10k_send(len=%"PRIx64")\n", totallen);
655
656    e10k_queue_add_txcontext(bu->q, 0, ETHHDR_LEN, IPHDR_LEN, 0, 0);
657    e10k_queue_add_txbuf_ctx(bu->q, desc->parts[0].phys,
658        desc->parts[0].size, desc->parts[0].opaque, 1, cnt == 1,
659        totallen, 0, true, false);
660
661    for (i = 1; i < cnt; i++) {
662        e10k_queue_add_txbuf(bu->q, desc->parts[i].phys,
663                desc->parts[i].size, desc->parts[i].opaque, 0, i == cnt - 1,
664                totallen);
665    }
666    e10k_queue_bump_txtail(bu->q);
667    DEBUG("bulk_e10k_send_done\n");
668    return SYS_ERR_OK;
669}
670
671/**
672 * Steer a specific UDP port to this queue.
673 *
674 * @param bu   Channel struct
675 * @param port Port to allocate (in host byte order)
676 */
677errval_t bulk_e10k_port_add(struct bulk_e10k *bu, uint16_t port)
678{
679    errval_t err;
680
681    // Register port
682    err = port_bind(0, 0, bu->qi, port);
683    assert(err_is_ok(err));
684    DEBUG("Port registered\n");
685
686    return SYS_ERR_OK;
687}
688
689/**
690 * Allocate an unused UDP port and steer it to this queue.
691 *
692 * @param bu   Channel struct
693 * @param port Pointer to variable where port number will be stored (host byte
694 *             order)
695 */
696errval_t bulk_e10k_port_alloc(struct bulk_e10k *bu, uint16_t *port)
697{
698    return port_get(0, 0, bu->qi, port);
699}
700
701/**
702 * Get IP address configured for this interface.
703 *
704 * @param bu Channel struct
705 * @param ip Pointer to variable where IP will be stored (host byte order)
706 */
707errval_t bulk_e10k_ip_info(struct bulk_e10k *bu, uint32_t *ip)
708{
709    errval_t err;
710    uint32_t gw, mask;
711    err = arp_ip_info(ip, &gw, &mask);
712    *ip = ntohl(*ip);
713    return err;
714}
715
716/**
717 * Do an ARP lookup on this interface
718 *
719 * @param bu  Channnel struct
720 * @param ip  IP address to resolve (in host byte order)
721 * @param mac Pointer to variable where MAC address will be stored
722 */
723errval_t bulk_e10k_arp_lookup(struct bulk_e10k *bu, uint32_t ip, uint64_t *mac)
724{
725    return arp_lookup(htonl(ip), mac);
726}
727