1/**
2 * \file
3 * \brief Driver for booting the Xeon Phi Coprocessor card on a Barrelfish Host
4 */
5
6/*
7 * Copyright (c) 2014 ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <stdio.h>
16#include <string.h>
17#include <barrelfish/barrelfish.h>
18#include <xeon_phi/xeon_phi.h>
19
20#include <if/xeon_phi_driver_defs.h>
21
22#include "xeon_phi_internal.h"
23#include "service.h"
24#include "interphi.h"
25#include "dma_service.h"
26#include "smpt.h"
27
28static uint32_t is_exported;
29
30static iref_t svc_iref;
31
32errval_t bootstrap_errors[XEON_PHI_NUM_MAX];
33
34/*
35 * ---------------------------------------------------------------------------
36 * Intra Xeon Phi Driver Communication bootstrap
37 */
38
39struct msg_open_st
40{
41    struct xeon_phi_driver_binding *b;
42    uint64_t base;
43    uint8_t bits;
44    errval_t err;
45};
46
47static void bootstrap_response_tx(void *a)
48{
49    errval_t err;
50
51    struct msg_open_st *st = a;
52
53    struct event_closure txcont = MKCONT(free, a);
54
55    err = xeon_phi_driver_bootstrap_response__tx(st->b, txcont, st->err);
56    if (err_is_fail(err)) {
57        if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
58            struct waitset *ws = get_default_waitset();
59            txcont = MKCONT(bootstrap_response_tx, a);
60            err = st->b->register_send(st->b, ws, txcont);
61            if (err_is_fail(err)) {
62                XSERVICE_DEBUG("Could not send!");
63            }
64        }
65    }
66}
67
68
69
70static void bootstrap_response_rx(struct xeon_phi_driver_binding *b,
71                                  errval_t msgerr)
72{
73    struct xnode *node = b->st;
74
75    XSERVICE_DEBUG("Xeon Phi Node %u recv bootstrap_response_rx: %s\n",
76                   node->local->id, err_getstring(msgerr));
77
78    node->bootstrap_done = 0x1;
79    node->err = msgerr;
80}
81
82static void bootstrap_call_rx(struct xeon_phi_driver_binding *b,
83                              uint64_t base,
84                              uint8_t bits)
85{
86    errval_t err;
87
88    struct xnode *node = b->st;
89
90    XSERVICE_DEBUG("Xeon Phi Node %u bootstrap_call_rx: [0x%016lx] from %u\n",
91                   node->local->id, base, node->id);
92
93    lpaddr_t offset = ((node->apt_base >> 32) - ((node->apt_base >> 34) << 2)) << 32;
94
95    err = interphi_bootstrap(node->local, base, bits, offset, node->id, 0x1);
96
97    struct msg_open_st *st = malloc(sizeof(*st));
98    if (st == NULL) {
99        USER_PANIC("could not allocate state");
100    }
101
102    st->err = err;
103    st->b = node->binding;
104
105    bootstrap_response_tx(st);
106}
107
108static void bootstrap_call_tx(void *a)
109{
110    errval_t err;
111
112    struct msg_open_st *st = a;
113
114    struct event_closure txcont = MKCONT(free, a);
115
116    err = xeon_phi_driver_bootstrap_call__tx(st->b, txcont, st->base, st->bits);
117    if (err_is_fail(err)) {
118        if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
119            struct waitset *ws = get_default_waitset();
120            txcont = MKCONT(bootstrap_call_tx, a);
121            err = st->b->register_send(st->b, ws, txcont);
122            if (err_is_fail(err)) {
123                XSERVICE_DEBUG("Could not send!");
124            }
125        }
126    }
127}
128
129
130
131/**
132 * \brief registers an intra card communication frame
133 *
134 * \param phi      the local xeon phi card
135 * \param xphi_id  target xeon phi id
136 */
137errval_t service_bootstrap(struct xeon_phi *phi,
138                           uint8_t xphi_id,
139                           struct capref frame)
140{
141    assert(xphi_id < XEON_PHI_NUM_MAX);
142
143    if (phi->id == xphi_id) {
144        XSERVICE_DEBUG("The IDs were the same. Skipping.\n");
145        return SYS_ERR_OK;
146    }
147
148    XSERVICE_DEBUG("sending bootstrap to node {xid:%u}.\n", xphi_id);
149
150    struct xnode *node = &phi->topology[xphi_id];
151
152    assert(node->bootstrap_done == 0);
153
154    errval_t err;
155    struct frame_identity id;
156    err = invoke_frame_identify(frame, &id);
157    if (err_is_fail(err)) {
158        return err;
159    }
160
161    if (node->state != XNODE_STATE_READY) {
162        return -1;  // TODO: error code
163    }
164
165    struct msg_open_st *st = malloc(sizeof(struct msg_open_st));
166    if (st == NULL) {
167        return LIB_ERR_MALLOC_FAIL;
168    }
169
170    node->err = SYS_ERR_OK;
171
172    st->b = node->binding;
173    st->base = id.base;
174    assert((1UL << log2ceil(id.bytes)) == id.bytes);
175    st->bits = log2ceil(id.bytes);
176
177    bootstrap_call_tx(st);
178
179    XSERVICE_DEBUG("waiting for bootstrap done:%u.\n", xphi_id);
180
181    while(!node->bootstrap_done) {
182        xeon_phi_event_poll(0x1);
183    }
184
185    return node->err;
186}
187
188/*
189 * ---------------------------------------------------------------------------
190 * Intra Xeon Phi Driver Communication Regigistration
191 */
192
193static void register_response_send(void *a)
194{
195    errval_t err;
196
197    struct xnode *topology = a;
198
199    struct event_closure txcont = MKCONT(NULL, a);
200
201    if (topology->state == XNODE_STATE_READY) {
202        err = SYS_ERR_OK;
203    } else {
204        err = -1;  // TODO> ERROR NUMBEr
205    }
206
207    struct xeon_phi *phi = topology->local;
208
209    err = xeon_phi_driver_register__response__tx(topology->binding, txcont, err,
210                                                phi->apt.pbase, phi->apt.length);
211    if (err_is_fail(err)) {
212        if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
213            struct waitset *ws = get_default_waitset();
214            txcont = MKCONT(register_response_send, a);
215            err = topology->binding->register_send(topology->binding, ws, txcont);
216            if (err_is_fail(err)) {
217                topology->state = XNODE_STATE_FAILURE;
218            }
219        }
220    }
221}
222
223/**
224 *
225 */
226static void register_call_recv(struct xeon_phi_driver_binding *_binding,
227                               uint8_t id,
228                               uint64_t other_apt_base,
229                               uint64_t other_apt_size)
230{
231    struct xeon_phi *phi = _binding->st;
232
233    interphi_wait_for_client(phi);
234
235    assert(id < XEON_PHI_NUM_MAX);
236    phi->topology[id].binding = _binding;
237    phi->topology[id].state = XNODE_STATE_READY;
238    phi->topology[id].apt_base = other_apt_base;
239    phi->topology[id].apt_size = other_apt_size;
240    phi->connected++;
241
242    if (!smpt_set_coprocessor_address(phi, id, other_apt_base)) {
243        assert(!"Setting page table entry failed");  // TODO: proper error handling
244    };
245
246    XSERVICE_DEBUG("Xeon Phi Node %u: New register call: id=0x%x @ [0x%016lx]\n",
247                   phi->id, id, other_apt_base);
248
249    _binding->st = &phi->topology[id];
250
251    register_response_send(&phi->topology[id]);
252}
253
254/**
255 *
256 */
257static void register_response_recv(struct xeon_phi_driver_binding *_binding,
258                                   xeon_phi_driver_errval_t msgerr,
259                                   uint64_t other_apt_base,
260                                   uint64_t other_apt_size)
261{
262    assert(((struct xnode * )(_binding->st))->binding == _binding);
263
264    struct xnode *topology = _binding->st;
265
266    if (err_is_fail(msgerr)) {
267        topology->state = XNODE_STATE_FAILURE;
268        XSERVICE_DEBUG("Xeon Phi node %u: Registering FAILED\n",
269                       topology->local->id);
270    } else {
271        topology->local->connected++;
272        topology->state = XNODE_STATE_READY;
273        topology->apt_base = other_apt_base;
274        topology->apt_size = other_apt_size;
275        XSERVICE_DEBUG("Xeon Phi node %u: Registering response. Node %u @ 0x%016lx\n",
276                       topology->local->id, topology->id, topology->apt_base);
277
278        smpt_set_coprocessor_address(topology->local, topology->id, other_apt_base);
279    }
280}
281
282static void register_call_sent_cb(void *a)
283{
284
285}
286
287static void register_call_send(void *a)
288{
289    errval_t err;
290
291    struct xnode *topology = a;
292
293    struct xeon_phi *phi = topology->local;
294
295    struct event_closure txcont = MKCONT(register_call_sent_cb, a);
296
297    topology->state = XNODE_STATE_REGISTERING;
298
299    err = xeon_phi_driver_register__call__tx(topology->binding, txcont, phi->id,
300                                            phi->apt.pbase, phi->apt.length);
301    if (err_is_fail(err)) {
302        if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
303            struct waitset *ws = get_default_waitset();
304            txcont = MKCONT(register_call_send, a);
305            err = topology->binding->register_send(topology->binding, ws, txcont);
306            if (err_is_fail(err)) {
307                topology->state = XNODE_STATE_FAILURE;
308            }
309        }
310    }
311}
312
313/// Receive handler table
314static struct xeon_phi_driver_rx_vtbl xps_rx_vtbl = {
315    .register__call = register_call_recv,
316    .register__response = register_response_recv,
317    .bootstrap_call = bootstrap_call_rx,
318    .bootstrap_response = bootstrap_response_rx
319};
320
321/*
322 * ---------------------------------------------------------------------------
323 * Service Setup
324 */
325static void svc_bind_cb(void *st,
326                        errval_t err,
327                        struct xeon_phi_driver_binding *b)
328{
329    struct xnode *node = st;
330    b->rx_vtbl = xps_rx_vtbl;
331    node->binding = b;
332    b->st = node;
333    node->state = XNODE_STATE_REGISTERING;
334}
335
336static errval_t svc_register(struct xnode *node)
337{
338    errval_t err;
339
340    XSERVICE_DEBUG("Initiate binding to Xeon Phi node %i @ iref=0x%x\n", node->id,
341                   node->iref);
342
343    err = xeon_phi_driver_bind(node->iref, svc_bind_cb, node, get_default_waitset(),
344    IDC_BIND_FLAGS_DEFAULT);
345    if (err_is_fail(err)) {
346        node->state = XNODE_STATE_FAILURE;
347        return err;
348    }
349
350    return SYS_ERR_OK;
351}
352
353static errval_t svc_connect_cb(void *st,
354                               struct xeon_phi_driver_binding *b)
355{
356    XSERVICE_DEBUG("Xeon Phi Node %u got a new connection to other node.\n",
357                   ((struct xeon_phi *)st)->id);
358
359    b->st = st;
360    b->rx_vtbl = xps_rx_vtbl;
361    return SYS_ERR_OK;
362}
363
364static void svc_export_cb(void *st,
365                          errval_t err,
366                          iref_t iref)
367{
368    if (err_is_fail(err)) {
369        svc_iref = 0x0;
370        return;
371    }
372
373    svc_iref = iref;
374
375    struct xeon_phi *phi = st;
376    phi->iref = iref;
377
378    is_exported = 0x1;
379}
380
381/**
382 * \brief initializes the service
383 *
384 * \param iref  returns the iref of the initialized service
385 *
386 * \return SYS_ERR_OK on success
387 */
388errval_t service_init(struct xeon_phi *phi)
389{
390    errval_t err;
391
392    for (uint32_t i = 0; i < XEON_PHI_NUM_MAX; ++i) {
393        phi->topology[i].local = phi;
394        phi->topology[i].id = i;
395        phi->topology[i].state = XNODE_STATE_NONE;
396    }
397
398    err = xeon_phi_driver_export(phi, svc_export_cb, svc_connect_cb,
399                                 get_default_waitset(),
400                                 IDC_EXPORT_FLAGS_DEFAULT);
401    if (err_is_fail(err)) {
402        return err;
403    }
404
405    while (!is_exported) {
406        messages_wait_and_handle_next();
407    }
408
409    if (svc_iref == 0x0) {
410        return -1;
411    }
412
413    return SYS_ERR_OK;
414
415}
416
417/**
418 * \brief registers the local service with the other Xeon Phi drivers
419 *        in the topology
420 *
421 * \param phi   pointer to the local card structure
422 * \param irefs the irefs of the other cards
423 * \param num   the number of irefs in the array
424 */
425errval_t service_register(struct xeon_phi *phi,
426                          iref_t *irefs,
427                          uint8_t num)
428{
429    errval_t err;
430
431    struct xnode *xnode;
432    XSERVICE_DEBUG("start binding to %u Xeon Phi nodes\n", num - 1);
433    for (uint32_t i = 0; i < num; ++i) {
434        xnode = &phi->topology[i];
435        xnode->local = phi;
436        if (i == phi->id) {
437            xnode->iref = phi->iref;
438            xnode->id = i;
439            xnode->state = XNODE_STATE_READY;
440            xnode->apt_base = phi->apt.pbase;
441            xnode->apt_size = phi->apt.length;
442            continue;
443        }
444
445        xnode->iref = irefs[i];
446        xnode->id = i;
447        xnode->state = XNODE_STATE_NONE;
448        svc_register(xnode);
449        while (xnode->state == XNODE_STATE_NONE) {
450            err = xeon_phi_event_poll(0x1);
451            if (err_is_fail(err)) {
452                return err;
453            }
454        }
455    }
456
457    XSERVICE_DEBUG("Start registering with %u Xeon Phi nodes\n", num - 1);
458
459    for (uint32_t i = 0; i < num; ++i) {
460        if (i == phi->id) {
461            continue;
462        }
463        xnode = &phi->topology[i];
464        register_call_send(xnode);
465        while (xnode->state == XNODE_STATE_REGISTERING) {
466            err = xeon_phi_event_poll(0x1);
467            if (err_is_fail(err)) {
468                return err;
469            }
470        }
471        if (xnode->state == XNODE_STATE_FAILURE) {
472            XSERVICE_DEBUG("Registering with Xeon Phi node %u failed.\n", xnode->id);
473        }
474    }
475
476    XSERVICE_DEBUG("Registering with other %i Xeon Phi done.\n", (uint32_t )num - 1);
477
478    return SYS_ERR_OK;
479}
480
481/**
482 * \brief starts the service request handling
483 */
484errval_t service_start(struct xeon_phi *phi)
485{
486    errval_t err;
487
488    while (1) {
489        err = xeon_phi_event_poll(0x1);
490        if (err_is_fail(err)) {
491            return err;
492        }
493    }
494
495    return SYS_ERR_OK;
496}
497