1/** 2 * \file 3 * \brief Driver for booting the Xeon Phi Coprocessor card on a Barrelfish Host 4 */ 5 6/* 7 * Copyright (c) 2014 ETH Zurich. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group. 13 */ 14 15#include <stdio.h> 16#include <string.h> 17#include <barrelfish/barrelfish.h> 18#include <xeon_phi/xeon_phi.h> 19 20#include <if/xeon_phi_driver_defs.h> 21 22#include "xeon_phi_internal.h" 23#include "service.h" 24#include "interphi.h" 25#include "dma_service.h" 26#include "smpt.h" 27 28static uint32_t is_exported; 29 30static iref_t svc_iref; 31 32errval_t bootstrap_errors[XEON_PHI_NUM_MAX]; 33 34/* 35 * --------------------------------------------------------------------------- 36 * Intra Xeon Phi Driver Communication bootstrap 37 */ 38 39struct msg_open_st 40{ 41 struct xeon_phi_driver_binding *b; 42 uint64_t base; 43 uint8_t bits; 44 errval_t err; 45}; 46 47static void bootstrap_response_tx(void *a) 48{ 49 errval_t err; 50 51 struct msg_open_st *st = a; 52 53 struct event_closure txcont = MKCONT(free, a); 54 55 err = xeon_phi_driver_bootstrap_response__tx(st->b, txcont, st->err); 56 if (err_is_fail(err)) { 57 if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 58 struct waitset *ws = get_default_waitset(); 59 txcont = MKCONT(bootstrap_response_tx, a); 60 err = st->b->register_send(st->b, ws, txcont); 61 if (err_is_fail(err)) { 62 XSERVICE_DEBUG("Could not send!"); 63 } 64 } 65 } 66} 67 68 69 70static void bootstrap_response_rx(struct xeon_phi_driver_binding *b, 71 errval_t msgerr) 72{ 73 struct xnode *node = b->st; 74 75 XSERVICE_DEBUG("Xeon Phi Node %u recv bootstrap_response_rx: %s\n", 76 node->local->id, err_getstring(msgerr)); 77 78 node->bootstrap_done = 0x1; 79 node->err = msgerr; 80} 81 82static void bootstrap_call_rx(struct xeon_phi_driver_binding *b, 83 uint64_t base, 84 uint8_t bits) 85{ 86 errval_t err; 87 88 struct xnode *node = b->st; 89 90 XSERVICE_DEBUG("Xeon Phi Node %u bootstrap_call_rx: [0x%016lx] from %u\n", 91 node->local->id, base, node->id); 92 93 lpaddr_t offset = ((node->apt_base >> 32) - ((node->apt_base >> 34) << 2)) << 32; 94 95 err = interphi_bootstrap(node->local, base, bits, offset, node->id, 0x1); 96 97 struct msg_open_st *st = malloc(sizeof(*st)); 98 if (st == NULL) { 99 USER_PANIC("could not allocate state"); 100 } 101 102 st->err = err; 103 st->b = node->binding; 104 105 bootstrap_response_tx(st); 106} 107 108static void bootstrap_call_tx(void *a) 109{ 110 errval_t err; 111 112 struct msg_open_st *st = a; 113 114 struct event_closure txcont = MKCONT(free, a); 115 116 err = xeon_phi_driver_bootstrap_call__tx(st->b, txcont, st->base, st->bits); 117 if (err_is_fail(err)) { 118 if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 119 struct waitset *ws = get_default_waitset(); 120 txcont = MKCONT(bootstrap_call_tx, a); 121 err = st->b->register_send(st->b, ws, txcont); 122 if (err_is_fail(err)) { 123 XSERVICE_DEBUG("Could not send!"); 124 } 125 } 126 } 127} 128 129 130 131/** 132 * \brief registers an intra card communication frame 133 * 134 * \param phi the local xeon phi card 135 * \param xphi_id target xeon phi id 136 */ 137errval_t service_bootstrap(struct xeon_phi *phi, 138 uint8_t xphi_id, 139 struct capref frame) 140{ 141 assert(xphi_id < XEON_PHI_NUM_MAX); 142 143 if (phi->id == xphi_id) { 144 XSERVICE_DEBUG("The IDs were the same. Skipping.\n"); 145 return SYS_ERR_OK; 146 } 147 148 XSERVICE_DEBUG("sending bootstrap to node {xid:%u}.\n", xphi_id); 149 150 struct xnode *node = &phi->topology[xphi_id]; 151 152 assert(node->bootstrap_done == 0); 153 154 errval_t err; 155 struct frame_identity id; 156 err = invoke_frame_identify(frame, &id); 157 if (err_is_fail(err)) { 158 return err; 159 } 160 161 if (node->state != XNODE_STATE_READY) { 162 return -1; // TODO: error code 163 } 164 165 struct msg_open_st *st = malloc(sizeof(struct msg_open_st)); 166 if (st == NULL) { 167 return LIB_ERR_MALLOC_FAIL; 168 } 169 170 node->err = SYS_ERR_OK; 171 172 st->b = node->binding; 173 st->base = id.base; 174 assert((1UL << log2ceil(id.bytes)) == id.bytes); 175 st->bits = log2ceil(id.bytes); 176 177 bootstrap_call_tx(st); 178 179 XSERVICE_DEBUG("waiting for bootstrap done:%u.\n", xphi_id); 180 181 while(!node->bootstrap_done) { 182 xeon_phi_event_poll(0x1); 183 } 184 185 return node->err; 186} 187 188/* 189 * --------------------------------------------------------------------------- 190 * Intra Xeon Phi Driver Communication Regigistration 191 */ 192 193static void register_response_send(void *a) 194{ 195 errval_t err; 196 197 struct xnode *topology = a; 198 199 struct event_closure txcont = MKCONT(NULL, a); 200 201 if (topology->state == XNODE_STATE_READY) { 202 err = SYS_ERR_OK; 203 } else { 204 err = -1; // TODO> ERROR NUMBEr 205 } 206 207 struct xeon_phi *phi = topology->local; 208 209 err = xeon_phi_driver_register__response__tx(topology->binding, txcont, err, 210 phi->apt.pbase, phi->apt.length); 211 if (err_is_fail(err)) { 212 if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 213 struct waitset *ws = get_default_waitset(); 214 txcont = MKCONT(register_response_send, a); 215 err = topology->binding->register_send(topology->binding, ws, txcont); 216 if (err_is_fail(err)) { 217 topology->state = XNODE_STATE_FAILURE; 218 } 219 } 220 } 221} 222 223/** 224 * 225 */ 226static void register_call_recv(struct xeon_phi_driver_binding *_binding, 227 uint8_t id, 228 uint64_t other_apt_base, 229 uint64_t other_apt_size) 230{ 231 struct xeon_phi *phi = _binding->st; 232 233 interphi_wait_for_client(phi); 234 235 assert(id < XEON_PHI_NUM_MAX); 236 phi->topology[id].binding = _binding; 237 phi->topology[id].state = XNODE_STATE_READY; 238 phi->topology[id].apt_base = other_apt_base; 239 phi->topology[id].apt_size = other_apt_size; 240 phi->connected++; 241 242 if (!smpt_set_coprocessor_address(phi, id, other_apt_base)) { 243 assert(!"Setting page table entry failed"); // TODO: proper error handling 244 }; 245 246 XSERVICE_DEBUG("Xeon Phi Node %u: New register call: id=0x%x @ [0x%016lx]\n", 247 phi->id, id, other_apt_base); 248 249 _binding->st = &phi->topology[id]; 250 251 register_response_send(&phi->topology[id]); 252} 253 254/** 255 * 256 */ 257static void register_response_recv(struct xeon_phi_driver_binding *_binding, 258 xeon_phi_driver_errval_t msgerr, 259 uint64_t other_apt_base, 260 uint64_t other_apt_size) 261{ 262 assert(((struct xnode * )(_binding->st))->binding == _binding); 263 264 struct xnode *topology = _binding->st; 265 266 if (err_is_fail(msgerr)) { 267 topology->state = XNODE_STATE_FAILURE; 268 XSERVICE_DEBUG("Xeon Phi node %u: Registering FAILED\n", 269 topology->local->id); 270 } else { 271 topology->local->connected++; 272 topology->state = XNODE_STATE_READY; 273 topology->apt_base = other_apt_base; 274 topology->apt_size = other_apt_size; 275 XSERVICE_DEBUG("Xeon Phi node %u: Registering response. Node %u @ 0x%016lx\n", 276 topology->local->id, topology->id, topology->apt_base); 277 278 smpt_set_coprocessor_address(topology->local, topology->id, other_apt_base); 279 } 280} 281 282static void register_call_sent_cb(void *a) 283{ 284 285} 286 287static void register_call_send(void *a) 288{ 289 errval_t err; 290 291 struct xnode *topology = a; 292 293 struct xeon_phi *phi = topology->local; 294 295 struct event_closure txcont = MKCONT(register_call_sent_cb, a); 296 297 topology->state = XNODE_STATE_REGISTERING; 298 299 err = xeon_phi_driver_register__call__tx(topology->binding, txcont, phi->id, 300 phi->apt.pbase, phi->apt.length); 301 if (err_is_fail(err)) { 302 if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 303 struct waitset *ws = get_default_waitset(); 304 txcont = MKCONT(register_call_send, a); 305 err = topology->binding->register_send(topology->binding, ws, txcont); 306 if (err_is_fail(err)) { 307 topology->state = XNODE_STATE_FAILURE; 308 } 309 } 310 } 311} 312 313/// Receive handler table 314static struct xeon_phi_driver_rx_vtbl xps_rx_vtbl = { 315 .register__call = register_call_recv, 316 .register__response = register_response_recv, 317 .bootstrap_call = bootstrap_call_rx, 318 .bootstrap_response = bootstrap_response_rx 319}; 320 321/* 322 * --------------------------------------------------------------------------- 323 * Service Setup 324 */ 325static void svc_bind_cb(void *st, 326 errval_t err, 327 struct xeon_phi_driver_binding *b) 328{ 329 struct xnode *node = st; 330 b->rx_vtbl = xps_rx_vtbl; 331 node->binding = b; 332 b->st = node; 333 node->state = XNODE_STATE_REGISTERING; 334} 335 336static errval_t svc_register(struct xnode *node) 337{ 338 errval_t err; 339 340 XSERVICE_DEBUG("Initiate binding to Xeon Phi node %i @ iref=0x%x\n", node->id, 341 node->iref); 342 343 err = xeon_phi_driver_bind(node->iref, svc_bind_cb, node, get_default_waitset(), 344 IDC_BIND_FLAGS_DEFAULT); 345 if (err_is_fail(err)) { 346 node->state = XNODE_STATE_FAILURE; 347 return err; 348 } 349 350 return SYS_ERR_OK; 351} 352 353static errval_t svc_connect_cb(void *st, 354 struct xeon_phi_driver_binding *b) 355{ 356 XSERVICE_DEBUG("Xeon Phi Node %u got a new connection to other node.\n", 357 ((struct xeon_phi *)st)->id); 358 359 b->st = st; 360 b->rx_vtbl = xps_rx_vtbl; 361 return SYS_ERR_OK; 362} 363 364static void svc_export_cb(void *st, 365 errval_t err, 366 iref_t iref) 367{ 368 if (err_is_fail(err)) { 369 svc_iref = 0x0; 370 return; 371 } 372 373 svc_iref = iref; 374 375 struct xeon_phi *phi = st; 376 phi->iref = iref; 377 378 is_exported = 0x1; 379} 380 381/** 382 * \brief initializes the service 383 * 384 * \param iref returns the iref of the initialized service 385 * 386 * \return SYS_ERR_OK on success 387 */ 388errval_t service_init(struct xeon_phi *phi) 389{ 390 errval_t err; 391 392 for (uint32_t i = 0; i < XEON_PHI_NUM_MAX; ++i) { 393 phi->topology[i].local = phi; 394 phi->topology[i].id = i; 395 phi->topology[i].state = XNODE_STATE_NONE; 396 } 397 398 err = xeon_phi_driver_export(phi, svc_export_cb, svc_connect_cb, 399 get_default_waitset(), 400 IDC_EXPORT_FLAGS_DEFAULT); 401 if (err_is_fail(err)) { 402 return err; 403 } 404 405 while (!is_exported) { 406 messages_wait_and_handle_next(); 407 } 408 409 if (svc_iref == 0x0) { 410 return -1; 411 } 412 413 return SYS_ERR_OK; 414 415} 416 417/** 418 * \brief registers the local service with the other Xeon Phi drivers 419 * in the topology 420 * 421 * \param phi pointer to the local card structure 422 * \param irefs the irefs of the other cards 423 * \param num the number of irefs in the array 424 */ 425errval_t service_register(struct xeon_phi *phi, 426 iref_t *irefs, 427 uint8_t num) 428{ 429 errval_t err; 430 431 struct xnode *xnode; 432 XSERVICE_DEBUG("start binding to %u Xeon Phi nodes\n", num - 1); 433 for (uint32_t i = 0; i < num; ++i) { 434 xnode = &phi->topology[i]; 435 xnode->local = phi; 436 if (i == phi->id) { 437 xnode->iref = phi->iref; 438 xnode->id = i; 439 xnode->state = XNODE_STATE_READY; 440 xnode->apt_base = phi->apt.pbase; 441 xnode->apt_size = phi->apt.length; 442 continue; 443 } 444 445 xnode->iref = irefs[i]; 446 xnode->id = i; 447 xnode->state = XNODE_STATE_NONE; 448 svc_register(xnode); 449 while (xnode->state == XNODE_STATE_NONE) { 450 err = xeon_phi_event_poll(0x1); 451 if (err_is_fail(err)) { 452 return err; 453 } 454 } 455 } 456 457 XSERVICE_DEBUG("Start registering with %u Xeon Phi nodes\n", num - 1); 458 459 for (uint32_t i = 0; i < num; ++i) { 460 if (i == phi->id) { 461 continue; 462 } 463 xnode = &phi->topology[i]; 464 register_call_send(xnode); 465 while (xnode->state == XNODE_STATE_REGISTERING) { 466 err = xeon_phi_event_poll(0x1); 467 if (err_is_fail(err)) { 468 return err; 469 } 470 } 471 if (xnode->state == XNODE_STATE_FAILURE) { 472 XSERVICE_DEBUG("Registering with Xeon Phi node %u failed.\n", xnode->id); 473 } 474 } 475 476 XSERVICE_DEBUG("Registering with other %i Xeon Phi done.\n", (uint32_t )num - 1); 477 478 return SYS_ERR_OK; 479} 480 481/** 482 * \brief starts the service request handling 483 */ 484errval_t service_start(struct xeon_phi *phi) 485{ 486 errval_t err; 487 488 while (1) { 489 err = xeon_phi_event_poll(0x1); 490 if (err_is_fail(err)) { 491 return err; 492 } 493 } 494 495 return SYS_ERR_OK; 496} 497