1/** \file 2 * \brief Routing table set-up dispatcher 3 */ 4 5/* 6 * Copyright (c) 2010, ETH Zurich. 7 * All rights reserved. 8 * 9 * This file is distributed under the terms in the attached LICENSE file. 10 * If you do not find this file, copies can be found by writing to: 11 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group. 12 */ 13 14#include <ctype.h> 15#include <stdio.h> 16#include <string.h> 17#include <inttypes.h> 18#include <barrelfish/barrelfish.h> 19#include <barrelfish/nameservice_client.h> 20#include <if/monitor_defs.h> 21#include <skb/skb.h> 22 23// state of the rts 24static int num_cores = 0; 25static coreid_t **routing_table; 26 27// are we done yet? RTS exits if this flag is set to true 28static int done = false; 29 30/** 31 * The routing table set-up dispatcher (RTS) is responsible to create the routing 32 * table (for multi-hop messaging) and send them to the first monitor that 33 * is booted. It uses informations from the System Knowledge Base (SKB). 34 * The routing table is used to determine where to forward a multi-hop channel 35 * set-up request. 36 * 37 * We currently support three routing modes: 38 * 39 * 1) DIRECT: Always take the direct path 40 * 41 * 2) RING: Route over all cores in the system. 42 * Core i will route to core (i + 1) mod num_cores 43 * 44 * 3) FAT_TREE: Route direct between cores on the same CPU socket. On each socket, 45 * there is a "leader" (core with lowest ID on that socket). We 46 * route directly between all leader. Routes between sockets 47 * lead through the two leaders. 48 * 49 */ 50 51 52/* 53 * BIG FIXME: 54 * 55 * This code queries the SKB for the number of available cores (which is 56 * determined from the APIC IDs present in ACPI), and assumes that: 57 * 1. all available cores will be booted 58 * 2. core IDs will be in the range 0..num_cores-1 59 * 60 * This is only true on x86_64, and only true when spawnd performs the default 61 * allocation of core IDs (i.e. no command-line arguments to spawnd). 62 */ 63 64 65/* ------------------------------ ROUTING ------------------------------ */ 66 67static void route_ring(void) 68{ 69 errval_t err; 70 char *result, *str_err; 71 int32_t int_err; 72 73 // get the number of cores from SKB 74 err = skb_evaluate("available_nr_cores(Nr),write(Nr).", &result, 75 &str_err, &int_err); 76 if (err_is_fail(err)) { 77 USER_PANIC_ERR(err, "could not get number of cores from SKB\n"); 78 } else if (int_err != 0) { 79 USER_PANIC("could not get number of cores from SKB: %s\n", str_err); 80 } 81 82 num_cores = atoi(result); 83 printf("routing-setup: discovered number of cores: %d\n", num_cores); 84 free(str_err); 85 free(result); 86 87 // we have enough information for this case, construct routing table 88 routing_table = malloc(sizeof(coreid_t *) * num_cores); 89 for (coreid_t i = 0; i < num_cores; i++) { 90 routing_table[i] = malloc(sizeof(coreid_t) * num_cores); 91 for (coreid_t j = 0; j < num_cores; j++) { 92 routing_table[i][j] = (i + 1) % num_cores; 93 } 94 } 95} 96 97static void route_fat_tree(void) 98{ 99 errval_t err; 100 char *result, *str_err; 101 int32_t int_err; 102 103 // get the number of cores from SKB 104 err = skb_evaluate("available_nr_cores(Nr),write(Nr).", &result, 105 &str_err, &int_err); 106 if (err_is_fail(err)) { 107 USER_PANIC_ERR(err, "could not get number of cores from SKB\n"); 108 } else if (int_err != 0) { 109 USER_PANIC("could not get number of cores from SKB: %s\n", str_err); 110 } 111 112 num_cores = atoi(result); 113 printf("routing-setup: discovered number of cores: %d\n", num_cores); 114 free(str_err); 115 free(result); 116 117 // we need to know the number of cores per socket 118 // FIXME: this may not be the same for all sockets in the system! 119 int cores_per_socket = 0; 120 err = skb_evaluate("setof(C,cpu_affinity(C,_,A),Set),length(Set,L),write(L).", 121 &result, &str_err, &int_err); 122 if (err_is_fail(err)) { 123 USER_PANIC_ERR(err, "routing_setup: could not get number of cores per" 124 " socket from SKB\n"); 125 } else if (int_err != 0) { 126 // information about CPU affinity is not present in the SKB 127 // use 1 as default 128 cores_per_socket = 1; 129 printf("routing_setup: could not find information about CPU affinity in" 130 " SKB, using one core per socket\n"); 131 } else { 132 cores_per_socket = atoi(result); 133 printf("routing-setup: discovered number of cores per socket: %d\n", 134 cores_per_socket); 135 } 136 free(str_err); 137 free(result); 138 139 // construct routing table 140 routing_table = malloc(sizeof(coreid_t *) * num_cores); 141 for (coreid_t i = 0; i < num_cores; i++) { 142 143 routing_table[i] = malloc(sizeof(coreid_t) * num_cores); 144 145 if (i % cores_per_socket == 0) { 146 // this is a master node --> always route to the master of a socket... 147 for (coreid_t j = 0; j < num_cores; j++) { 148 routing_table[i][j] = j - (j % cores_per_socket); 149 } 150 151 // ... except in our subtree, where we create a full mesh 152 for (coreid_t j = i; j < i + cores_per_socket; j++) { 153 routing_table[i][j] = j; 154 } 155 156 } else { 157 // this node is not the master of a socket 158 159 // current master node 160 coreid_t master = i - (i % cores_per_socket); 161 162 // we always route to our master... 163 for (coreid_t j = 0; j < num_cores; j++) { 164 routing_table[i][j] = master; 165 } 166 167 // ... except in our subtree, where we create a full mesh 168 for (coreid_t j = master; j < master + cores_per_socket; j++) { 169 routing_table[i][j] = j; 170 } 171 } 172 } 173} 174 175 176/* ------------------------------ IDC ------------------------------ */ 177 178// send the routing table to the monitor 179static void send_table_to_monitor(void *arg) 180{ 181 182 errval_t err; 183 struct monitor_binding *b = get_monitor_binding(); 184 struct event_closure cont = MKCONT(send_table_to_monitor, NULL); 185 186 static enum {NEW_TABLE, SET_TABLE, DONE} phase = NEW_TABLE; 187 assert(phase == NEW_TABLE || phase == SET_TABLE || phase == DONE); 188 static coreid_t current_core = 0; 189 assert(current_core <= num_cores); 190 191 switch (phase) { 192 case NEW_TABLE: 193 // re-initialise the routing table 194 err = monitor_multihop_routing_table_new__tx(b, cont, num_cores - 1, 195 num_cores); 196 if (err_is_ok(err)) { 197 phase = SET_TABLE; 198 } 199 break; 200 201 case SET_TABLE: 202 // send a part of the routing table 203 err = monitor_multihop_routing_table_set__tx(b, cont, current_core, 204 routing_table[current_core], 205 num_cores); 206 if (err_is_ok(err)) { 207 if (++current_core == num_cores) { 208 phase = DONE; 209 } 210 } 211 break; 212 213 case DONE: 214 done = true; 215 err = SYS_ERR_OK; 216 break; 217 218 /* XXX: if -DNDEBUG is set, gcc fails to deduce that err is always set. 219 * The code below makes it obvious */ 220 default: 221 USER_PANIC("Unexpected value of phase:%u", phase); 222 } 223 224 if (err_is_fail(err)) { 225 if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 226 err = b->register_send(b, get_default_waitset(), cont); 227 assert(err_is_ok(err)); 228 } else { 229 USER_PANIC_ERR(err, "routing-setup: could not send routing table to" 230 " monitor\n"); 231 } 232 } 233} 234 235/* ------------------------------ MAIN ------------------------------ */ 236 237int main(int argc, char *argv[]) 238{ 239 // the used routing mode 240 enum { 241 MULTIHOP_ROUTE_DIRECT, MULTIHOP_ROUTE_RING, MULTIHOP_ROUTE_FAT_TREE 242 } routing_mode = MULTIHOP_ROUTE_DIRECT; // the default 243 244 errval_t err; 245 iref_t iref; 246 247 for (int i = 1; i < argc; i++) { 248 if (strcmp(argv[i], "direct") == 0) { 249 routing_mode = MULTIHOP_ROUTE_DIRECT; 250 } else if (strcmp(argv[i], "ring") == 0) { 251 routing_mode = MULTIHOP_ROUTE_RING; 252 } else if (strcmp(argv[i], "fat_tree") == 0) { 253 routing_mode = MULTIHOP_ROUTE_FAT_TREE; 254 } else if (strcmp(argv[i], "boot") == 0) { 255 // ignored 256 } else { 257 printf("%s: Unknown argument: %s\n", argv[0], argv[i]); 258 } 259 } 260 261 if (routing_mode == MULTIHOP_ROUTE_DIRECT) { 262 // don't do anything, as direct routing is anyway 263 // the default 264 goto out; 265 } 266 267 // Wait for pci to finish ACPI enumeration. 268 // This uses the nameserver as a lock server. 269 err = nameservice_blocking_lookup("pci_discovery_done", &iref); 270 if (err_is_fail(err)) { 271 USER_PANIC_ERR(err, "nameservice_blocking_lookup failed"); 272 } 273 274 // connect to the system knowledge base (SKB) 275 err = skb_client_connect(); 276 if (err_is_fail(err)) { 277 USER_PANIC_ERR(err, "skb_client_connect failed"); 278 } 279 280 // compute routing table 281 switch (routing_mode) { 282 case MULTIHOP_ROUTE_RING: 283 route_ring(); 284 break; 285 286 case MULTIHOP_ROUTE_FAT_TREE: 287 route_fat_tree(); 288 break; 289 290 default: 291 USER_PANIC("routing_setup: unknown routing mode\n"); 292 } 293 294 // send the routing table to the monitor 295 assert(routing_table != NULL); 296 send_table_to_monitor(NULL); 297 298 // handle messages 299 struct waitset *ws = get_default_waitset(); 300 while (!done) { 301 err = event_dispatch(ws); 302 if (err_is_fail(err)) { 303 USER_PANIC_ERR(err, "in event_dispatch"); 304 } 305 } 306 307 out: 308 // let everybody know that we are done by registering rts_done 309 // with the nameservice 310 err = nameservice_register("rts_done", 0); 311 assert(err_is_ok(err)); 312 313 return EXIT_SUCCESS; 314} 315