1219820Sjeff/*
2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3219820Sjeff * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5219820Sjeff *
6219820Sjeff * This software is available to you under a choice of one of two
7219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
8219820Sjeff * General Public License (GPL) Version 2, available from the file
9219820Sjeff * COPYING in the main directory of this source tree, or the
10219820Sjeff * OpenIB.org BSD license below:
11219820Sjeff *
12219820Sjeff *     Redistribution and use in source and binary forms, with or
13219820Sjeff *     without modification, are permitted provided that the following
14219820Sjeff *     conditions are met:
15219820Sjeff *
16219820Sjeff *      - Redistributions of source code must retain the above
17219820Sjeff *        copyright notice, this list of conditions and the following
18219820Sjeff *        disclaimer.
19219820Sjeff *
20219820Sjeff *      - Redistributions in binary form must reproduce the above
21219820Sjeff *        copyright notice, this list of conditions and the following
22219820Sjeff *        disclaimer in the documentation and/or other materials
23219820Sjeff *        provided with the distribution.
24219820Sjeff *
25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32219820Sjeff * SOFTWARE.
33219820Sjeff *
34219820Sjeff */
35219820Sjeff
36219820Sjeff/*
37219820Sjeff * Abstract:
38219820Sjeff *    Implementation of osm_link_mgr_t.
39219820Sjeff * This file implements the Link Manager object.
40219820Sjeff */
41219820Sjeff
42219820Sjeff#if HAVE_CONFIG_H
43219820Sjeff#  include <config.h>
44219820Sjeff#endif				/* HAVE_CONFIG_H */
45219820Sjeff
46219820Sjeff#include <string.h>
47219820Sjeff#include <iba/ib_types.h>
48219820Sjeff#include <complib/cl_debug.h>
49219820Sjeff#include <opensm/osm_sm.h>
50219820Sjeff#include <opensm/osm_node.h>
51219820Sjeff#include <opensm/osm_switch.h>
52219820Sjeff#include <opensm/osm_helper.h>
53219820Sjeff#include <opensm/osm_msgdef.h>
54219820Sjeff
55219820Sjeff/**********************************************************************
56219820Sjeff **********************************************************************/
57219820Sjeffstatic boolean_t
58219820Sjeff__osm_link_mgr_set_physp_pi(osm_sm_t * sm,
59219820Sjeff			    IN osm_physp_t * const p_physp,
60219820Sjeff			    IN uint8_t const port_state)
61219820Sjeff{
62219820Sjeff	uint8_t payload[IB_SMP_DATA_SIZE];
63219820Sjeff	ib_port_info_t *const p_pi = (ib_port_info_t *) payload;
64219820Sjeff	const ib_port_info_t *p_old_pi;
65219820Sjeff	osm_madw_context_t context;
66219820Sjeff	osm_node_t *p_node;
67219820Sjeff	ib_api_status_t status;
68219820Sjeff	uint8_t port_num;
69219820Sjeff	uint8_t mtu;
70219820Sjeff	uint8_t op_vls;
71219820Sjeff	boolean_t esp0 = FALSE;
72219820Sjeff	boolean_t send_set = FALSE;
73219820Sjeff	osm_physp_t *p_remote_physp;
74219820Sjeff
75219820Sjeff	OSM_LOG_ENTER(sm->p_log);
76219820Sjeff
77219820Sjeff	p_node = osm_physp_get_node_ptr(p_physp);
78219820Sjeff
79219820Sjeff	port_num = osm_physp_get_port_num(p_physp);
80219820Sjeff
81219820Sjeff	if (port_num == 0) {
82219820Sjeff		/*
83219820Sjeff		   CAs don't have a port 0, and for switch port 0,
84219820Sjeff		   we need to check if this is enhanced or base port 0.
85219820Sjeff		   For base port 0 the following parameters are not valid (p822, table 145).
86219820Sjeff		 */
87219820Sjeff		if (!p_node->sw) {
88219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: "
89219820Sjeff				"Cannot find switch by guid: 0x%" PRIx64 "\n",
90219820Sjeff				cl_ntoh64(p_node->node_info.node_guid));
91219820Sjeff			goto Exit;
92219820Sjeff		}
93219820Sjeff
94219820Sjeff		if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info)
95219820Sjeff		    == FALSE) {
96219820Sjeff			/* This means the switch doesn't support enhanced port 0.
97219820Sjeff			   Can skip it. */
98219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
99219820Sjeff				"Skipping port 0, GUID 0x%016" PRIx64 "\n",
100219820Sjeff				cl_ntoh64(osm_physp_get_port_guid(p_physp)));
101219820Sjeff			goto Exit;
102219820Sjeff		}
103219820Sjeff		esp0 = TRUE;
104219820Sjeff	}
105219820Sjeff
106219820Sjeff	/*
107219820Sjeff	   PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0
108219820Sjeff	 */
109219820Sjeff
110219820Sjeff	p_old_pi = &p_physp->port_info;
111219820Sjeff
112219820Sjeff	memset(payload, 0, IB_SMP_DATA_SIZE);
113219820Sjeff	memcpy(payload, p_old_pi, sizeof(ib_port_info_t));
114219820Sjeff
115219820Sjeff	/*
116219820Sjeff	   Should never write back a value that is bigger then 3 in
117219820Sjeff	   the PortPhysicalState field - so can not simply copy!
118219820Sjeff
119219820Sjeff	   Actually we want to write there:
120219820Sjeff	   port physical state - no change,
121219820Sjeff	   link down default state = polling
122219820Sjeff	   port state - as requested.
123219820Sjeff	 */
124219820Sjeff	p_pi->state_info2 = 0x02;
125219820Sjeff	ib_port_info_set_port_state(p_pi, port_state);
126219820Sjeff
127219820Sjeff	if (ib_port_info_get_link_down_def_state(p_pi) !=
128219820Sjeff	    ib_port_info_get_link_down_def_state(p_old_pi))
129219820Sjeff		send_set = TRUE;
130219820Sjeff
131219820Sjeff	/* didn't get PortInfo before */
132219820Sjeff	if (!ib_port_info_get_port_state(p_old_pi))
133219820Sjeff		send_set = TRUE;
134219820Sjeff
135219820Sjeff	/* we only change port fields if we do not change state */
136219820Sjeff	if (port_state == IB_LINK_NO_CHANGE) {
137219820Sjeff		/* The following fields are relevant only for CA port, router, or Enh. SP0 */
138219820Sjeff		if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH ||
139219820Sjeff		    port_num == 0) {
140219820Sjeff			p_pi->m_key = sm->p_subn->opt.m_key;
141219820Sjeff			if (memcmp(&p_pi->m_key, &p_old_pi->m_key,
142219820Sjeff				   sizeof(p_pi->m_key)))
143219820Sjeff				send_set = TRUE;
144219820Sjeff
145219820Sjeff			p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix;
146219820Sjeff			if (memcmp(&p_pi->subnet_prefix,
147219820Sjeff				   &p_old_pi->subnet_prefix,
148219820Sjeff				   sizeof(p_pi->subnet_prefix)))
149219820Sjeff				send_set = TRUE;
150219820Sjeff
151219820Sjeff			p_pi->base_lid = osm_physp_get_base_lid(p_physp);
152219820Sjeff			if (memcmp(&p_pi->base_lid, &p_old_pi->base_lid,
153219820Sjeff				   sizeof(p_pi->base_lid)))
154219820Sjeff				send_set = TRUE;
155219820Sjeff
156219820Sjeff			/* we are initializing the ports with our local sm_base_lid */
157219820Sjeff			p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid;
158219820Sjeff			if (memcmp(&p_pi->master_sm_base_lid,
159219820Sjeff				   &p_old_pi->master_sm_base_lid,
160219820Sjeff				   sizeof(p_pi->master_sm_base_lid)))
161219820Sjeff				send_set = TRUE;
162219820Sjeff
163219820Sjeff			p_pi->m_key_lease_period =
164219820Sjeff			    sm->p_subn->opt.m_key_lease_period;
165219820Sjeff			if (memcmp(&p_pi->m_key_lease_period,
166219820Sjeff				   &p_old_pi->m_key_lease_period,
167219820Sjeff				   sizeof(p_pi->m_key_lease_period)))
168219820Sjeff				send_set = TRUE;
169219820Sjeff
170219820Sjeff			if (esp0 == FALSE)
171219820Sjeff				p_pi->mkey_lmc = sm->p_subn->opt.lmc;
172219820Sjeff			else {
173219820Sjeff				if (sm->p_subn->opt.lmc_esp0)
174219820Sjeff					p_pi->mkey_lmc = sm->p_subn->opt.lmc;
175219820Sjeff				else
176219820Sjeff					p_pi->mkey_lmc = 0;
177219820Sjeff			}
178219820Sjeff			if (memcmp(&p_pi->mkey_lmc, &p_old_pi->mkey_lmc,
179219820Sjeff				   sizeof(p_pi->mkey_lmc)))
180219820Sjeff				send_set = TRUE;
181219820Sjeff
182219820Sjeff			ib_port_info_set_timeout(p_pi,
183219820Sjeff						 sm->p_subn->opt.
184219820Sjeff						 subnet_timeout);
185219820Sjeff			if (ib_port_info_get_timeout(p_pi) !=
186219820Sjeff			    ib_port_info_get_timeout(p_old_pi))
187219820Sjeff				send_set = TRUE;
188219820Sjeff		}
189219820Sjeff
190219820Sjeff		/*
191219820Sjeff		   Several timeout mechanisms:
192219820Sjeff		 */
193219820Sjeff		p_remote_physp = osm_physp_get_remote(p_physp);
194219820Sjeff		if (port_num != 0 && p_remote_physp) {
195219820Sjeff			if (osm_node_get_type(osm_physp_get_node_ptr(p_physp))
196219820Sjeff			    == IB_NODE_TYPE_ROUTER) {
197219820Sjeff				ib_port_info_set_hoq_lifetime(p_pi,
198219820Sjeff							      sm->p_subn->
199219820Sjeff							      opt.
200219820Sjeff							      leaf_head_of_queue_lifetime);
201219820Sjeff			} else
202219820Sjeff			    if (osm_node_get_type
203219820Sjeff				(osm_physp_get_node_ptr(p_physp)) ==
204219820Sjeff				IB_NODE_TYPE_SWITCH) {
205219820Sjeff				/* Is remote end CA or router (a leaf port) ? */
206219820Sjeff				if (osm_node_get_type
207219820Sjeff				    (osm_physp_get_node_ptr(p_remote_physp)) !=
208219820Sjeff				    IB_NODE_TYPE_SWITCH) {
209219820Sjeff					ib_port_info_set_hoq_lifetime(p_pi,
210219820Sjeff								      sm->
211219820Sjeff								      p_subn->
212219820Sjeff								      opt.
213219820Sjeff								      leaf_head_of_queue_lifetime);
214219820Sjeff					ib_port_info_set_vl_stall_count(p_pi,
215219820Sjeff									sm->
216219820Sjeff									p_subn->
217219820Sjeff									opt.
218219820Sjeff									leaf_vl_stall_count);
219219820Sjeff				} else {
220219820Sjeff					ib_port_info_set_hoq_lifetime(p_pi,
221219820Sjeff								      sm->
222219820Sjeff								      p_subn->
223219820Sjeff								      opt.
224219820Sjeff								      head_of_queue_lifetime);
225219820Sjeff					ib_port_info_set_vl_stall_count(p_pi,
226219820Sjeff									sm->
227219820Sjeff									p_subn->
228219820Sjeff									opt.
229219820Sjeff									vl_stall_count);
230219820Sjeff				}
231219820Sjeff			}
232219820Sjeff			if (ib_port_info_get_hoq_lifetime(p_pi) !=
233219820Sjeff			    ib_port_info_get_hoq_lifetime(p_old_pi) ||
234219820Sjeff			    ib_port_info_get_vl_stall_count(p_pi) !=
235219820Sjeff			    ib_port_info_get_vl_stall_count(p_old_pi))
236219820Sjeff				send_set = TRUE;
237219820Sjeff		}
238219820Sjeff
239219820Sjeff		ib_port_info_set_phy_and_overrun_err_thd(p_pi,
240219820Sjeff							 sm->p_subn->opt.
241219820Sjeff							 local_phy_errors_threshold,
242219820Sjeff							 sm->p_subn->opt.
243219820Sjeff							 overrun_errors_threshold);
244219820Sjeff		if (memcmp(&p_pi->error_threshold, &p_old_pi->error_threshold,
245219820Sjeff			   sizeof(p_pi->error_threshold)))
246219820Sjeff			send_set = TRUE;
247219820Sjeff
248219820Sjeff		/*
249219820Sjeff		   Set the easy common parameters for all port types,
250219820Sjeff		   then determine the neighbor MTU.
251219820Sjeff		 */
252219820Sjeff		p_pi->link_width_enabled = p_old_pi->link_width_supported;
253219820Sjeff		if (memcmp(&p_pi->link_width_enabled,
254219820Sjeff			   &p_old_pi->link_width_enabled,
255219820Sjeff			   sizeof(p_pi->link_width_enabled)))
256219820Sjeff			send_set = TRUE;
257219820Sjeff
258219820Sjeff		if (sm->p_subn->opt.force_link_speed &&
259219820Sjeff		    (sm->p_subn->opt.force_link_speed != 15 ||
260219820Sjeff		     ib_port_info_get_link_speed_enabled(p_pi) !=
261219820Sjeff		     ib_port_info_get_link_speed_sup(p_pi))) {
262219820Sjeff			ib_port_info_set_link_speed_enabled(p_pi,
263219820Sjeff							    sm->p_subn->opt.
264219820Sjeff							    force_link_speed);
265219820Sjeff			if (memcmp(&p_pi->link_speed, &p_old_pi->link_speed,
266219820Sjeff				   sizeof(p_pi->link_speed)))
267219820Sjeff				send_set = TRUE;
268219820Sjeff		}
269219820Sjeff
270219820Sjeff		/* calc new op_vls and mtu */
271219820Sjeff		op_vls =
272219820Sjeff		    osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp);
273219820Sjeff		mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp);
274219820Sjeff
275219820Sjeff		ib_port_info_set_neighbor_mtu(p_pi, mtu);
276219820Sjeff		if (ib_port_info_get_neighbor_mtu(p_pi) !=
277219820Sjeff		    ib_port_info_get_neighbor_mtu(p_old_pi))
278219820Sjeff			send_set = TRUE;
279219820Sjeff
280219820Sjeff		ib_port_info_set_op_vls(p_pi, op_vls);
281219820Sjeff		if (ib_port_info_get_op_vls(p_pi) !=
282219820Sjeff		    ib_port_info_get_op_vls(p_old_pi))
283219820Sjeff			send_set = TRUE;
284219820Sjeff
285219820Sjeff		/* provide the vl_high_limit from the qos mgr */
286219820Sjeff		if (sm->p_subn->opt.qos &&
287219820Sjeff		    p_physp->vl_high_limit != p_old_pi->vl_high_limit) {
288219820Sjeff			send_set = TRUE;
289219820Sjeff			p_pi->vl_high_limit = p_physp->vl_high_limit;
290219820Sjeff		}
291219820Sjeff	}
292219820Sjeff
293219820Sjeff	if (port_state != IB_LINK_NO_CHANGE &&
294219820Sjeff	    port_state != ib_port_info_get_port_state(p_old_pi)) {
295219820Sjeff		send_set = TRUE;
296219820Sjeff		if (port_state == IB_LINK_ACTIVE)
297219820Sjeff			context.pi_context.active_transition = TRUE;
298219820Sjeff		else
299219820Sjeff			context.pi_context.active_transition = FALSE;
300219820Sjeff	}
301219820Sjeff
302219820Sjeff	context.pi_context.node_guid = osm_node_get_node_guid(p_node);
303219820Sjeff	context.pi_context.port_guid = osm_physp_get_port_guid(p_physp);
304219820Sjeff	context.pi_context.set_method = TRUE;
305219820Sjeff	context.pi_context.light_sweep = FALSE;
306219820Sjeff
307219820Sjeff	/* We need to send the PortInfoSet request with the new sm_lid
308219820Sjeff	   in the following cases:
309219820Sjeff	   1. There is a change in the values (send_set == TRUE)
310219820Sjeff	   2. This is a switch external port (so it wasn't handled yet by
311219820Sjeff	   osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE,
312219820Sjeff	   which means the SM just became master, and it then needs to send at
313219820Sjeff	   PortInfoSet to every port.
314219820Sjeff	 */
315219820Sjeff	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num
316219820Sjeff	    && sm->p_subn->first_time_master_sweep == TRUE)
317219820Sjeff		send_set = TRUE;
318219820Sjeff
319219820Sjeff	if (send_set)
320219820Sjeff		status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp),
321219820Sjeff				     payload, sizeof(payload),
322219820Sjeff				     IB_MAD_ATTR_PORT_INFO,
323219820Sjeff				     cl_hton32(port_num),
324219820Sjeff				     CL_DISP_MSGID_NONE, &context);
325219820Sjeff
326219820SjeffExit:
327219820Sjeff	OSM_LOG_EXIT(sm->p_log);
328219820Sjeff	return send_set;
329219820Sjeff}
330219820Sjeff
331219820Sjeff/**********************************************************************
332219820Sjeff **********************************************************************/
333219820Sjeffstatic osm_signal_t
334219820Sjeff__osm_link_mgr_process_node(osm_sm_t * sm,
335219820Sjeff			    IN osm_node_t * const p_node,
336219820Sjeff			    IN const uint8_t link_state)
337219820Sjeff{
338219820Sjeff	uint32_t i;
339219820Sjeff	uint32_t num_physp;
340219820Sjeff	osm_physp_t *p_physp;
341219820Sjeff	uint8_t current_state;
342219820Sjeff	osm_signal_t signal = OSM_SIGNAL_DONE;
343219820Sjeff
344219820Sjeff	OSM_LOG_ENTER(sm->p_log);
345219820Sjeff
346219820Sjeff	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
347219820Sjeff		"Node 0x%" PRIx64 " going to %s\n",
348219820Sjeff		cl_ntoh64(osm_node_get_node_guid(p_node)),
349219820Sjeff		ib_get_port_state_str(link_state));
350219820Sjeff
351219820Sjeff	/*
352219820Sjeff	   Set the PortInfo for every Physical Port associated
353219820Sjeff	   with this Port.  Start iterating with port 1, since the linkstate
354219820Sjeff	   is not applicable to the management port on switches.
355219820Sjeff	 */
356219820Sjeff	num_physp = osm_node_get_num_physp(p_node);
357219820Sjeff	for (i = 0; i < num_physp; i++) {
358219820Sjeff		/*
359219820Sjeff		   Don't bother doing anything if this Physical Port is not valid.
360219820Sjeff		   or if the state of the port is already better then the
361219820Sjeff		   specified state.
362219820Sjeff		 */
363219820Sjeff		p_physp = osm_node_get_physp_ptr(p_node, (uint8_t) i);
364219820Sjeff		if (!p_physp)
365219820Sjeff			continue;
366219820Sjeff
367219820Sjeff		current_state = osm_physp_get_port_state(p_physp);
368219820Sjeff		if (current_state == IB_LINK_DOWN)
369219820Sjeff			continue;
370219820Sjeff
371219820Sjeff		/*
372219820Sjeff		   Normally we only send state update if state is lower
373219820Sjeff		   then required state. However, we need to send update if
374219820Sjeff		   no state change required.
375219820Sjeff		 */
376219820Sjeff		if (link_state != IB_LINK_NO_CHANGE &&
377219820Sjeff		    link_state <= current_state)
378219820Sjeff			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
379219820Sjeff				"Physical port %u already %s. Skipping\n",
380219820Sjeff				p_physp->port_num,
381219820Sjeff				ib_get_port_state_str(current_state));
382219820Sjeff		else if (__osm_link_mgr_set_physp_pi(sm, p_physp, link_state))
383219820Sjeff			signal = OSM_SIGNAL_DONE_PENDING;
384219820Sjeff	}
385219820Sjeff
386219820Sjeff	OSM_LOG_EXIT(sm->p_log);
387219820Sjeff	return (signal);
388219820Sjeff}
389219820Sjeff
390219820Sjeff/**********************************************************************
391219820Sjeff **********************************************************************/
392219820Sjeffosm_signal_t osm_link_mgr_process(osm_sm_t * sm, IN const uint8_t link_state)
393219820Sjeff{
394219820Sjeff	cl_qmap_t *p_node_guid_tbl;
395219820Sjeff	osm_node_t *p_node;
396219820Sjeff	osm_signal_t signal = OSM_SIGNAL_DONE;
397219820Sjeff
398219820Sjeff	OSM_LOG_ENTER(sm->p_log);
399219820Sjeff
400219820Sjeff	p_node_guid_tbl = &sm->p_subn->node_guid_tbl;
401219820Sjeff
402219820Sjeff	CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
403219820Sjeff
404219820Sjeff	for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl);
405219820Sjeff	     p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl);
406219820Sjeff	     p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item)) {
407219820Sjeff		if (__osm_link_mgr_process_node(sm, p_node, link_state) ==
408219820Sjeff		    OSM_SIGNAL_DONE_PENDING)
409219820Sjeff			signal = OSM_SIGNAL_DONE_PENDING;
410219820Sjeff	}
411219820Sjeff
412219820Sjeff	CL_PLOCK_RELEASE(sm->p_lock);
413219820Sjeff
414219820Sjeff	OSM_LOG_EXIT(sm->p_log);
415219820Sjeff	return (signal);
416219820Sjeff}
417