1/** \file
2 *  \brief Routing table set-up dispatcher
3 */
4
5/*
6 * Copyright (c) 2010, ETH Zurich.
7 * All rights reserved.
8 *
9 * This file is distributed under the terms in the attached LICENSE file.
10 * If you do not find this file, copies can be found by writing to:
11 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
12 */
13
14#include <ctype.h>
15#include <stdio.h>
16#include <string.h>
17#include <inttypes.h>
18#include <barrelfish/barrelfish.h>
19#include <barrelfish/nameservice_client.h>
20#include <if/monitor_defs.h>
21#include <skb/skb.h>
22
23// state of the rts
24static int num_cores = 0;
25static coreid_t **routing_table;
26
27// are we done yet? RTS exits if this flag is set to true
28static int done = false;
29
30/**
31 * The routing table set-up dispatcher (RTS) is responsible to create the routing
32 * table (for multi-hop messaging) and send them to the first monitor that
33 * is booted. It uses informations from the System Knowledge Base (SKB).
34 * The routing table is used to determine where to forward a multi-hop channel
35 * set-up request.
36 *
37 * We currently support three routing modes:
38 *
39 * 1) DIRECT:   Always take the direct path
40 *
41 * 2) RING:     Route over all cores in the system.
42 *              Core i will route to core (i + 1) mod num_cores
43 *
44 * 3) FAT_TREE: Route direct between cores on the same CPU socket. On each socket,
45 *              there is a "leader" (core with lowest ID on that socket). We
46 *              route directly between all leader. Routes between sockets
47 *              lead through the two leaders.
48 *
49 */
50
51
52/*
53 * BIG FIXME:
54 *
55 * This code queries the SKB for the number of available cores (which is
56 * determined from the APIC IDs present in ACPI), and assumes that:
57 * 1. all available cores will be booted
58 * 2. core IDs will be in the range 0..num_cores-1
59 *
60 * This is only true on x86_64, and only true when spawnd performs the default
61 * allocation of core IDs (i.e. no command-line arguments to spawnd).
62 */
63
64
65/* ------------------------------ ROUTING ------------------------------ */
66
67static void route_ring(void)
68{
69    errval_t err;
70    char *result, *str_err;
71    int32_t int_err;
72
73    // get the number of cores from SKB
74    err = skb_evaluate("available_nr_cores(Nr),write(Nr).", &result,
75                       &str_err, &int_err);
76    if (err_is_fail(err)) {
77        USER_PANIC_ERR(err, "could not get number of cores from SKB\n");
78    } else if (int_err != 0) {
79        USER_PANIC("could not get number of cores from SKB: %s\n", str_err);
80    }
81
82    num_cores = atoi(result);
83    printf("routing-setup: discovered number of cores: %d\n", num_cores);
84    free(str_err);
85    free(result);
86
87    // we have enough information for this case, construct routing table
88    routing_table = malloc(sizeof(coreid_t *) * num_cores);
89    for (coreid_t i = 0; i < num_cores; i++) {
90        routing_table[i] = malloc(sizeof(coreid_t) * num_cores);
91        for (coreid_t j = 0; j < num_cores; j++) {
92            routing_table[i][j] = (i + 1) % num_cores;
93        }
94    }
95}
96
97static void route_fat_tree(void)
98{
99    errval_t err;
100    char *result, *str_err;
101    int32_t int_err;
102
103    // get the number of cores from SKB
104    err = skb_evaluate("available_nr_cores(Nr),write(Nr).", &result,
105                       &str_err, &int_err);
106    if (err_is_fail(err)) {
107        USER_PANIC_ERR(err, "could not get number of cores from SKB\n");
108    } else if (int_err != 0) {
109        USER_PANIC("could not get number of cores from SKB: %s\n", str_err);
110    }
111
112    num_cores = atoi(result);
113    printf("routing-setup: discovered number of cores: %d\n", num_cores);
114    free(str_err);
115    free(result);
116
117    // we need to know the number of cores per socket
118    // FIXME: this may not be the same for all sockets in the system!
119    int cores_per_socket = 0;
120    err = skb_evaluate("setof(C,cpu_affinity(C,_,A),Set),length(Set,L),write(L).",
121                       &result, &str_err, &int_err);
122    if (err_is_fail(err)) {
123        USER_PANIC_ERR(err, "routing_setup: could not get number of cores per"
124                       " socket from SKB\n");
125    } else if (int_err != 0) {
126        // information about CPU affinity is not present in the SKB
127        // use 1 as default
128        cores_per_socket = 1;
129        printf("routing_setup: could not find information about CPU affinity in"
130               " SKB, using one core per socket\n");
131    } else {
132        cores_per_socket = atoi(result);
133        printf("routing-setup: discovered number of cores per socket: %d\n",
134               cores_per_socket);
135    }
136    free(str_err);
137    free(result);
138
139    // construct routing table
140    routing_table = malloc(sizeof(coreid_t *) * num_cores);
141    for (coreid_t i = 0; i < num_cores; i++) {
142
143        routing_table[i] = malloc(sizeof(coreid_t) * num_cores);
144
145        if (i % cores_per_socket == 0) {
146            // this is a master node --> always route to the master of a socket...
147            for (coreid_t j = 0; j < num_cores; j++) {
148                routing_table[i][j] = j - (j % cores_per_socket);
149            }
150
151            // ... except in our subtree, where we create a full mesh
152            for (coreid_t j = i; j < i + cores_per_socket; j++) {
153                routing_table[i][j] = j;
154            }
155
156        } else {
157            // this node is not the master of a socket
158
159            // current master node
160            coreid_t master = i - (i % cores_per_socket);
161
162            // we always route to our master...
163            for (coreid_t j = 0; j < num_cores; j++) {
164                routing_table[i][j] = master;
165            }
166
167            // ... except in our subtree, where we create a full mesh
168            for (coreid_t j = master; j < master + cores_per_socket; j++) {
169                routing_table[i][j] = j;
170            }
171        }
172    }
173}
174
175
176/* ------------------------------ IDC ------------------------------ */
177
178// send the routing table to the monitor
179static void send_table_to_monitor(void *arg)
180{
181
182    errval_t err;
183    struct monitor_binding *b = get_monitor_binding();
184    struct event_closure cont = MKCONT(send_table_to_monitor, NULL);
185
186    static enum {NEW_TABLE, SET_TABLE, DONE} phase = NEW_TABLE;
187    assert(phase == NEW_TABLE || phase == SET_TABLE || phase == DONE);
188    static coreid_t current_core = 0;
189    assert(current_core <= num_cores);
190
191    switch (phase) {
192    case NEW_TABLE:
193        // re-initialise the routing table
194        err = monitor_multihop_routing_table_new__tx(b, cont, num_cores - 1,
195                                                     num_cores);
196        if (err_is_ok(err)) {
197            phase = SET_TABLE;
198        }
199        break;
200
201    case SET_TABLE:
202        // send a part of the routing table
203        err = monitor_multihop_routing_table_set__tx(b, cont, current_core,
204                                                     routing_table[current_core],
205                                                     num_cores);
206        if (err_is_ok(err)) {
207            if (++current_core == num_cores) {
208                phase = DONE;
209            }
210        }
211        break;
212
213    case DONE:
214        done = true;
215        err = SYS_ERR_OK;
216        break;
217
218    /* XXX: if -DNDEBUG is set, gcc fails to deduce that err is always set.
219     *      The code below makes it obvious */
220    default:
221        USER_PANIC("Unexpected value of phase:%u", phase);
222    }
223
224    if (err_is_fail(err)) {
225        if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
226            err = b->register_send(b, get_default_waitset(), cont);
227            assert(err_is_ok(err));
228        } else {
229            USER_PANIC_ERR(err, "routing-setup: could not send routing table to"
230                           " monitor\n");
231        }
232    }
233}
234
235/* ------------------------------ MAIN ------------------------------ */
236
237int main(int argc, char *argv[])
238{
239    // the used routing mode
240    enum {
241        MULTIHOP_ROUTE_DIRECT, MULTIHOP_ROUTE_RING, MULTIHOP_ROUTE_FAT_TREE
242    } routing_mode = MULTIHOP_ROUTE_DIRECT; // the default
243
244    errval_t err;
245    iref_t iref;
246
247    for (int i = 1; i < argc; i++) {
248        if (strcmp(argv[i], "direct") == 0) {
249            routing_mode = MULTIHOP_ROUTE_DIRECT;
250        } else if (strcmp(argv[i], "ring") == 0) {
251            routing_mode = MULTIHOP_ROUTE_RING;
252        } else if (strcmp(argv[i], "fat_tree") == 0) {
253            routing_mode = MULTIHOP_ROUTE_FAT_TREE;
254        } else if (strcmp(argv[i], "boot") == 0) {
255            // ignored
256        } else {
257            printf("%s: Unknown argument: %s\n", argv[0], argv[i]);
258        }
259    }
260
261    if (routing_mode == MULTIHOP_ROUTE_DIRECT) {
262        // don't do anything, as direct routing is anyway
263        // the default
264        goto out;
265    }
266
267    // Wait for pci to finish ACPI enumeration.
268    // This uses the nameserver as a lock server.
269    err = nameservice_blocking_lookup("pci_discovery_done", &iref);
270    if (err_is_fail(err)) {
271        USER_PANIC_ERR(err, "nameservice_blocking_lookup failed");
272    }
273
274    // connect to the system knowledge base (SKB)
275    err = skb_client_connect();
276    if (err_is_fail(err)) {
277        USER_PANIC_ERR(err, "skb_client_connect failed");
278    }
279
280    // compute routing table
281    switch (routing_mode) {
282    case MULTIHOP_ROUTE_RING:
283        route_ring();
284        break;
285
286    case MULTIHOP_ROUTE_FAT_TREE:
287        route_fat_tree();
288        break;
289
290    default:
291        USER_PANIC("routing_setup: unknown routing mode\n");
292    }
293
294    // send the routing table to the monitor
295    assert(routing_table != NULL);
296    send_table_to_monitor(NULL);
297
298    // handle messages
299    struct waitset *ws = get_default_waitset();
300    while (!done) {
301        err = event_dispatch(ws);
302        if (err_is_fail(err)) {
303            USER_PANIC_ERR(err, "in event_dispatch");
304        }
305    }
306
307 out:
308    // let everybody know that we are done by registering rts_done
309    // with the nameservice
310    err = nameservice_register("rts_done", 0);
311    assert(err_is_ok(err));
312
313    return EXIT_SUCCESS;
314}
315