1/*
2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2007,2009 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 * Copyright (c) 2009 Battelle Memorial Institue. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 */
37
38/*
39 * Abstract:
40 *      Implementation of Up Down Algorithm using ranking & Min Hop
41 *      Calculation functions
42 */
43
44#if HAVE_CONFIG_H
45#  include <config.h>
46#endif				/* HAVE_CONFIG_H */
47
48#include <stdlib.h>
49#include <ctype.h>
50#include <complib/cl_debug.h>
51#include <complib/cl_qmap.h>
52#include <opensm/osm_file_ids.h>
53#define FILE_ID OSM_FILE_UCAST_DNUP_C
54#include <opensm/osm_switch.h>
55#include <opensm/osm_opensm.h>
56#include <opensm/osm_ucast_mgr.h>
57
58/* //////////////////////////// */
59/*  Local types                 */
60/* //////////////////////////// */
61
62/* direction */
63typedef enum dnup_switch_dir {
64	UP = 0,
65	DOWN,
66	EQUAL
67} dnup_switch_dir_t;
68
69/* dnup structure */
70typedef struct dnup {
71	osm_opensm_t *p_osm;
72} dnup_t;
73
74struct dnup_node {
75	cl_list_item_t list;
76	osm_switch_t *sw;
77	dnup_switch_dir_t dir;
78	unsigned rank;
79	unsigned visited;
80};
81
82/* This function returns direction based on rank and guid info of current &
83   remote ports */
84static dnup_switch_dir_t dnup_get_dir(unsigned cur_rank, unsigned rem_rank)
85{
86	/* HACK: comes to solve root nodes connection, in a classic subnet root nodes do not connect
87	   directly, but in case they are we assign to root node an UP direction to allow DNUP to discover
88	   the subnet correctly (and not from the point of view of the last root node).
89	 */
90	if (!cur_rank && !rem_rank)
91		return EQUAL;
92
93	if (cur_rank < rem_rank)
94		return DOWN;
95	else if (cur_rank > rem_rank)
96		return UP;
97	else
98		return EQUAL;
99}
100
101/**********************************************************************
102 * This function does the bfs of min hop table calculation by guid index
103 * as a starting point.
104 **********************************************************************/
105static int dnup_bfs_by_node(IN osm_log_t * p_log, IN osm_subn_t * p_subn,
106			    IN osm_switch_t * p_sw, IN uint8_t prune_weight,
107			    OUT uint8_t * max_hops)
108{
109	uint8_t pn, pn_rem;
110	cl_qlist_t list;
111	uint16_t lid;
112	struct dnup_node *u;
113	dnup_switch_dir_t next_dir, current_dir;
114
115	OSM_LOG_ENTER(p_log);
116
117	lid = osm_node_get_base_lid(p_sw->p_node, 0);
118	lid = cl_ntoh16(lid);
119	osm_switch_set_hops(p_sw, lid, 0, 0);
120
121	OSM_LOG(p_log, OSM_LOG_DEBUG,
122		"Starting from switch - port GUID 0x%" PRIx64 " lid %u\n",
123		cl_ntoh64(p_sw->p_node->node_info.port_guid), lid);
124
125	u = p_sw->priv;
126	u->dir = DOWN;
127
128	/* Update list with the new element */
129	cl_qlist_init(&list);
130	cl_qlist_insert_tail(&list, &u->list);
131
132	/* BFS the list till no next element */
133	while (!cl_is_qlist_empty(&list)) {
134		u = (struct dnup_node *)cl_qlist_remove_head(&list);
135		u->visited = 0;	/* cleanup */
136		current_dir = u->dir;
137		/* Go over all ports of the switch and find unvisited remote nodes */
138		for (pn = 1; pn < u->sw->num_ports; pn++) {
139			osm_node_t *p_remote_node;
140			struct dnup_node *rem_u;
141			uint8_t current_min_hop, remote_min_hop,
142			    set_hop_return_value;
143			osm_switch_t *p_remote_sw;
144
145			p_remote_node =
146			    osm_node_get_remote_node(u->sw->p_node, pn,
147						     &pn_rem);
148			/* If no remote node OR remote node is not a SWITCH
149			   continue to next pn */
150			if (!p_remote_node || !p_remote_node->sw)
151				continue;
152			/* Fetch remote guid only after validation of remote node */
153			p_remote_sw = p_remote_node->sw;
154			rem_u = p_remote_sw->priv;
155			/* Decide which direction to mark it (UP/DOWN) */
156			next_dir = dnup_get_dir(u->rank, rem_u->rank);
157
158			/* Set MinHop value for the current lid */
159			current_min_hop = osm_switch_get_least_hops(u->sw, lid);
160			/* Check hop count if better insert into list && update
161			   the remote node Min Hop Table */
162			remote_min_hop =
163			    osm_switch_get_hop_count(p_remote_sw, lid, pn_rem);
164
165			/* Check if this is a legal step : the only illegal step is going
166			   from UP to DOWN */
167			if ((current_dir == UP) && (next_dir == DOWN)) {
168				OSM_LOG(p_log, OSM_LOG_DEBUG,
169					"Avoiding move from 0x%016" PRIx64
170					" to 0x%016" PRIx64 "\n",
171					cl_ntoh64(osm_node_get_node_guid(u->sw->p_node)),
172					cl_ntoh64(osm_node_get_node_guid(p_remote_node)));
173				/* Illegal step. If prune_weight is set, allow it with an
174				 * additional weight
175				 */
176				if(prune_weight) {
177					current_min_hop+=prune_weight;
178					if(current_min_hop >= 64) {
179						OSM_LOG(p_log, OSM_LOG_ERROR,
180							"ERR AE02: Too many hops on subnet,"
181							" can't relax illegal Dn/Up transition.");
182						osm_switch_set_hops(p_remote_sw, lid,
183								    pn_rem, OSM_NO_PATH);
184					}
185				} else {
186					continue;
187				}
188			}
189			if (current_min_hop + 1 < remote_min_hop) {
190				set_hop_return_value =
191				    osm_switch_set_hops(p_remote_sw, lid,
192							pn_rem,
193							current_min_hop + 1);
194				if(max_hops && current_min_hop + 1 > *max_hops) {
195					*max_hops = current_min_hop + 1;
196				}
197				if (set_hop_return_value) {
198					OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AE01: "
199						"Invalid value returned from set min hop is: %d\n",
200						set_hop_return_value);
201				}
202				/* Check if remote port has already been visited */
203				if (!rem_u->visited) {
204					/* Insert dnup_switch item into the list */
205					rem_u->dir = next_dir;
206					rem_u->visited = 1;
207					cl_qlist_insert_tail(&list,
208							     &rem_u->list);
209				}
210			}
211		}
212	}
213
214	OSM_LOG_EXIT(p_log);
215	return 0;
216}
217
218/* NOTE : PLS check if we need to decide that the first */
219/*        rank is a SWITCH for BFS purpose */
220static int dnup_subn_rank(IN dnup_t * p_dnup)
221{
222	osm_switch_t *p_sw;
223	osm_physp_t *p_physp, *p_remote_physp;
224	cl_qlist_t list;
225	cl_map_item_t *item;
226	struct dnup_node *u, *remote_u;
227	uint8_t num_ports, port_num;
228	osm_log_t *p_log = &p_dnup->p_osm->log;
229	unsigned max_rank = 0;
230
231	OSM_LOG_ENTER(p_log);
232	cl_qlist_init(&list);
233
234	/* add all node level switches to the list */
235	for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
236	     item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
237	     item = cl_qmap_next(item)) {
238		p_sw = (osm_switch_t *)item;
239		u = p_sw->priv;
240		if (u->rank == 0)
241			cl_qlist_insert_tail(&list, &u->list);
242	}
243
244	/* BFS the list till it's empty */
245	while (!cl_is_qlist_empty(&list)) {
246		u = (struct dnup_node *)cl_qlist_remove_head(&list);
247		/* Go over all remote nodes and rank them (if not already visited) */
248		p_sw = u->sw;
249		num_ports = p_sw->num_ports;
250		OSM_LOG(p_log, OSM_LOG_DEBUG,
251			"Handling switch GUID 0x%" PRIx64 "\n",
252			cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)));
253		for (port_num = 1; port_num < num_ports; port_num++) {
254			ib_net64_t port_guid;
255
256			/* Current port fetched in order to get remote side */
257			p_physp =
258			    osm_node_get_physp_ptr(p_sw->p_node, port_num);
259
260			if (!p_physp)
261				continue;
262
263			p_remote_physp = p_physp->p_remote_physp;
264
265			/*
266			   make sure that all the following occur on p_remote_physp:
267			   1. The port isn't NULL
268			   2. It is a switch
269			 */
270			if (p_remote_physp && p_remote_physp->p_node->sw) {
271				remote_u = p_remote_physp->p_node->sw->priv;
272				port_guid = p_remote_physp->port_guid;
273
274				if (remote_u->rank > u->rank + 1) {
275					remote_u->rank = u->rank + 1;
276					max_rank = remote_u->rank;
277					cl_qlist_insert_tail(&list,
278							     &remote_u->list);
279					OSM_LOG(p_log, OSM_LOG_DEBUG,
280						"Rank of port GUID 0x%" PRIx64
281						" = %u\n", cl_ntoh64(port_guid),
282						remote_u->rank);
283				}
284			}
285		}
286	}
287
288	/* Print Summary of ranking */
289	OSM_LOG(p_log, OSM_LOG_VERBOSE,
290		"Subnet ranking completed. Max Node Rank = %d\n", max_rank);
291	OSM_LOG_EXIT(p_log);
292	return 0;
293}
294
295static int dnup_set_min_hop_table(IN dnup_t * p_dnup)
296{
297	osm_subn_t *p_subn = &p_dnup->p_osm->subn;
298	osm_log_t *p_log = &p_dnup->p_osm->log;
299	osm_switch_t *p_sw;
300	struct dnup_node *u;
301	cl_map_item_t *item;
302	uint8_t max_hops = 0;
303
304	OSM_LOG_ENTER(p_log);
305
306	/* Go over all the switches in the subnet - for each init their Min Hop
307	   Table */
308	OSM_LOG(p_log, OSM_LOG_VERBOSE,
309		"Init Min Hop Table of all switches [\n");
310
311	for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
312	     item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
313	     item = cl_qmap_next(item)) {
314		p_sw = (osm_switch_t *)item;
315		/* Clear Min Hop Table */
316		osm_switch_clear_hops(p_sw);
317	}
318
319	OSM_LOG(p_log, OSM_LOG_VERBOSE,
320		"Init Min Hop Table of all switches ]\n");
321
322	/* Now do the BFS for each port  in the subnet */
323	OSM_LOG(p_log, OSM_LOG_VERBOSE,
324		"BFS through all port guids in the subnet [\n");
325
326	for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
327	     item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
328	     item = cl_qmap_next(item)) {
329		p_sw = (osm_switch_t *)item;
330		dnup_bfs_by_node(p_log, p_subn, p_sw, 0, &max_hops);
331	}
332	if(p_subn->opt.connect_roots) {
333		/*This is probably not necessary, by I am more comfortable
334		 * clearing any possible side effects from the previous
335		 * dnup routing pass
336		 */
337		for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
338		     item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
339		     item = cl_qmap_next(item)) {
340			p_sw = (osm_switch_t *)item;
341			osm_switch_clear_hops(p_sw);
342			u = (struct dnup_node *) p_sw->priv;
343			u->visited = 0;
344		}
345		for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
346		     item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
347		     item = cl_qmap_next(item)) {
348			p_sw = (osm_switch_t *)item;
349			dnup_bfs_by_node(p_log, p_subn, p_sw, max_hops + 1, NULL);
350		}
351	}
352
353	OSM_LOG(p_log, OSM_LOG_VERBOSE,
354		"BFS through all port guids in the subnet ]\n");
355	/* Cleanup */
356	OSM_LOG_EXIT(p_log);
357	return 0;
358}
359
360static int dnup_build_lid_matrices(IN dnup_t * p_dnup)
361{
362	int status;
363
364	OSM_LOG_ENTER(&p_dnup->p_osm->log);
365
366	OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_VERBOSE,
367		"Ranking all port guids in the list\n");
368	/* Check if it's not a switched subnet */
369	if (cl_is_qmap_empty(&p_dnup->p_osm->subn.sw_guid_tbl)) {
370		OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_ERROR, "ERR AEOB: "
371			"This is not a switched subnet, cannot perform DNUP algorithm\n");
372		status = -1;
373		goto _exit;
374	}
375
376	/* Rank the subnet switches */
377	dnup_subn_rank(p_dnup);
378
379	/* After multiple ranking need to set Min Hop Table by DnUp algorithm  */
380	OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_VERBOSE,
381		"Setting all switches' Min Hop Table\n");
382	status = dnup_set_min_hop_table(p_dnup);
383
384_exit:
385	OSM_LOG_EXIT(&p_dnup->p_osm->log);
386	return status;
387}
388
389static struct dnup_node *create_dnup_node(osm_switch_t * sw)
390{
391	struct dnup_node *u;
392
393	u = malloc(sizeof(*u));
394	if (!u)
395		return NULL;
396	memset(u, 0, sizeof(*u));
397	u->sw = sw;
398	u->rank = 0xffffffff;
399	return u;
400}
401
402static void delete_dnup_node(struct dnup_node *u)
403{
404	u->sw->priv = NULL;
405	free(u);
406}
407
408/* DNUP callback function */
409static int dnup_lid_matrices(void *ctx)
410{
411	dnup_t *p_dnup = ctx;
412	cl_map_item_t *item;
413	osm_switch_t *p_sw;
414	int ret = 0;
415	int num_leafs = 0;
416	uint8_t pn, pn_rem;
417
418	OSM_LOG_ENTER(&p_dnup->p_osm->log);
419
420	for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
421	     item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
422	     item = cl_qmap_next(item)) {
423		p_sw = (osm_switch_t *)item;
424		p_sw->priv = create_dnup_node(p_sw);
425		if (!p_sw->priv) {
426			OSM_LOG(&(p_dnup->p_osm->log), OSM_LOG_ERROR, "ERR AE0C: "
427				"cannot create dnup node\n");
428			OSM_LOG_EXIT(&p_dnup->p_osm->log);
429			return -1;
430		}
431	}
432
433
434	/* First setup node level nodes */
435	for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
436	     item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
437	     item = cl_qmap_next(item)) {
438		p_sw = (osm_switch_t *)item;
439
440		for (pn = 0; pn < p_sw->num_ports; pn++) {
441			osm_node_t *p_remote_node;
442			p_remote_node = osm_node_get_remote_node(p_sw->p_node, pn, &pn_rem);
443			if(p_remote_node && !p_remote_node->sw) {
444				struct dnup_node *u = p_sw->priv;
445				u->rank = 0;
446				OSM_LOG(&(p_dnup->p_osm->log),
447					OSM_LOG_VERBOSE, "(%s) rank 0 leaf switch\n",
448					p_sw->p_node->print_desc);
449				num_leafs++;
450				break;
451			}
452		}
453	}
454
455	if(num_leafs == 0) {
456		OSM_LOG(&(p_dnup->p_osm->log),
457			OSM_LOG_ERROR, "ERR AE0D: No leaf switches found, DnUp routing failed\n");
458		OSM_LOG_EXIT(&p_dnup->p_osm->log);
459		return -1;
460	}
461
462	ret = dnup_build_lid_matrices(p_dnup);
463
464	for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
465	     item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
466	     item = cl_qmap_next(item)) {
467		p_sw = (osm_switch_t *) item;
468		delete_dnup_node(p_sw->priv);
469	}
470
471	OSM_LOG_EXIT(&p_dnup->p_osm->log);
472	return ret;
473}
474
475static void dnup_delete(void *context)
476{
477	free(context);
478}
479
480int osm_ucast_dnup_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
481{
482	dnup_t *dnup;
483
484	OSM_LOG_ENTER(&osm->log);
485
486	dnup = malloc(sizeof(dnup_t));
487	if (!dnup)
488		return -1;
489	memset(dnup, 0, sizeof(dnup_t));
490
491	dnup->p_osm = osm;
492
493	r->context = dnup;
494	r->destroy = dnup_delete;
495	r->build_lid_matrices = dnup_lid_matrices;
496
497	OSM_LOG_EXIT(&osm->log);
498	return 0;
499}
500