1219820Sjeff/*
2230135Suqs * Copyright (c) 2006 Intel Corporation.  All rights reserved.
3219820Sjeff *
4219820Sjeff * This software is available to you under a choice of one of two
5219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
6219820Sjeff * General Public License (GPL) Version 2, available from the file
7219820Sjeff * COPYING in the main directory of this source tree, or the
8219820Sjeff * OpenIB.org BSD license below:
9219820Sjeff *
10219820Sjeff *     Redistribution and use in source and binary forms, with or
11219820Sjeff *     without modification, are permitted provided that the following
12219820Sjeff *     conditions are met:
13219820Sjeff *
14219820Sjeff *      - Redistributions of source code must retain the above
15219820Sjeff *        copyright notice, this list of conditions and the following
16219820Sjeff *        disclaimer.
17219820Sjeff *
18219820Sjeff *      - Redistributions in binary form must reproduce the above
19219820Sjeff *        copyright notice, this list of conditions and the following
20219820Sjeff *        disclaimer in the documentation and/or other materials
21219820Sjeff *        provided with the distribution.
22219820Sjeff *
23219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30219820Sjeff * SOFTWARE.
31219820Sjeff */
32219820Sjeff
33219820Sjeff#include <linux/dma-mapping.h>
34219820Sjeff#include <linux/err.h>
35219820Sjeff#include <linux/interrupt.h>
36219820Sjeff#include <linux/rbtree.h>
37219820Sjeff#include <linux/mutex.h>
38219820Sjeff#include <linux/spinlock.h>
39219820Sjeff#include <linux/pci.h>
40219820Sjeff#include <linux/miscdevice.h>
41219820Sjeff#include <linux/random.h>
42219820Sjeff
43219820Sjeff#include <rdma/ib_cache.h>
44219820Sjeff#include <rdma/ib_sa.h>
45219820Sjeff#include "sa.h"
46219820Sjeff
47219820SjeffMODULE_AUTHOR("Sean Hefty");
48219820SjeffMODULE_DESCRIPTION("InfiniBand subnet administration caching");
49219820SjeffMODULE_LICENSE("Dual BSD/GPL");
50219820Sjeff
51219820Sjeffenum {
52219820Sjeff	SA_DB_MAX_PATHS_PER_DEST = 0x7F,
53219820Sjeff	SA_DB_MIN_RETRY_TIMER	 = 4000,  /*   4 sec */
54219820Sjeff	SA_DB_MAX_RETRY_TIMER	 = 256000 /* 256 sec */
55219820Sjeff};
56219820Sjeff
57219820Sjeffstatic int set_paths_per_dest(const char *val, struct kernel_param *kp);
58219820Sjeffstatic unsigned long paths_per_dest = 0;
59219820Sjeffmodule_param_call(paths_per_dest, set_paths_per_dest, param_get_ulong,
60219820Sjeff		  &paths_per_dest, 0644);
61219820SjeffMODULE_PARM_DESC(paths_per_dest, "Maximum number of paths to retrieve "
62219820Sjeff				 "to each destination (DGID).  Set to 0 "
63219820Sjeff				 "to disable cache.");
64219820Sjeff
65219820Sjeffstatic int set_subscribe_inform_info(const char *val, struct kernel_param *kp);
66219820Sjeffstatic char subscribe_inform_info = 1;
67219820Sjeffmodule_param_call(subscribe_inform_info, set_subscribe_inform_info,
68219820Sjeff		  param_get_bool, &subscribe_inform_info, 0644);
69219820SjeffMODULE_PARM_DESC(subscribe_inform_info,
70219820Sjeff		 "Subscribe for SA InformInfo/Notice events.");
71219820Sjeff
72219820Sjeffstatic int do_refresh(const char *val, struct kernel_param *kp);
73219820Sjeffmodule_param_call(refresh, do_refresh, NULL, NULL, 0200);
74219820Sjeff
75219820Sjeffstatic unsigned long retry_timer = SA_DB_MIN_RETRY_TIMER;
76219820Sjeff
77219820Sjeffenum sa_db_lookup_method {
78219820Sjeff	SA_DB_LOOKUP_LEAST_USED,
79219820Sjeff	SA_DB_LOOKUP_RANDOM
80219820Sjeff};
81219820Sjeff
82219820Sjeffstatic int set_lookup_method(const char *val, struct kernel_param *kp);
83219820Sjeffstatic int get_lookup_method(char *buf, struct kernel_param *kp);
84219820Sjeffstatic unsigned long lookup_method;
85219820Sjeffmodule_param_call(lookup_method, set_lookup_method, get_lookup_method,
86219820Sjeff		  &lookup_method, 0644);
87219820SjeffMODULE_PARM_DESC(lookup_method, "Method used to return path records when "
88219820Sjeff				"multiple paths exist to a given destination.");
89219820Sjeff
90219820Sjeffstatic void sa_db_add_dev(struct ib_device *device);
91219820Sjeffstatic void sa_db_remove_dev(struct ib_device *device);
92219820Sjeff
93219820Sjeffstatic struct ib_client sa_db_client = {
94219820Sjeff	.name   = "local_sa",
95219820Sjeff	.add    = sa_db_add_dev,
96219820Sjeff	.remove = sa_db_remove_dev
97219820Sjeff};
98219820Sjeff
99219820Sjeffstatic LIST_HEAD(dev_list);
100219820Sjeffstatic DEFINE_MUTEX(lock);
101219820Sjeffstatic rwlock_t rwlock;
102219820Sjeffstatic struct workqueue_struct *sa_wq;
103219820Sjeffstatic struct ib_sa_client sa_client;
104219820Sjeff
105219820Sjeffenum sa_db_state {
106219820Sjeff	SA_DB_IDLE,
107219820Sjeff	SA_DB_REFRESH,
108219820Sjeff	SA_DB_DESTROY
109219820Sjeff};
110219820Sjeff
111219820Sjeffstruct sa_db_port {
112219820Sjeff	struct sa_db_device	*dev;
113219820Sjeff	struct ib_mad_agent	*agent;
114219820Sjeff	/* Limit number of outstanding MADs to SA to reduce SA flooding */
115219820Sjeff	struct ib_mad_send_buf	*msg;
116219820Sjeff	u16			sm_lid;
117219820Sjeff	u8			sm_sl;
118219820Sjeff	struct ib_inform_info	*in_info;
119219820Sjeff	struct ib_inform_info	*out_info;
120219820Sjeff	struct rb_root		paths;
121219820Sjeff	struct list_head	update_list;
122219820Sjeff	unsigned long		update_id;
123219820Sjeff	enum sa_db_state	state;
124219820Sjeff	struct work_struct	work;
125219820Sjeff	union ib_gid		gid;
126219820Sjeff	int			port_num;
127219820Sjeff};
128219820Sjeff
129219820Sjeffstruct sa_db_device {
130219820Sjeff	struct list_head	list;
131219820Sjeff	struct ib_device	*device;
132219820Sjeff	struct ib_event_handler event_handler;
133219820Sjeff	int			start_port;
134219820Sjeff	int			port_count;
135219820Sjeff	struct sa_db_port	port[0];
136219820Sjeff};
137219820Sjeff
138219820Sjeffstruct ib_sa_iterator {
139219820Sjeff	struct ib_sa_iterator	*next;
140219820Sjeff};
141219820Sjeff
142219820Sjeffstruct ib_sa_attr_iter {
143219820Sjeff	struct ib_sa_iterator	*iter;
144219820Sjeff	unsigned long		flags;
145219820Sjeff};
146219820Sjeff
147219820Sjeffstruct ib_sa_attr_list {
148219820Sjeff	struct ib_sa_iterator	iter;
149219820Sjeff	struct ib_sa_iterator	*tail;
150219820Sjeff	int			update_id;
151219820Sjeff	union ib_gid		gid;
152219820Sjeff	struct rb_node		node;
153219820Sjeff};
154219820Sjeff
155219820Sjeffstruct ib_path_rec_info {
156219820Sjeff	struct ib_sa_iterator	iter; /* keep first */
157219820Sjeff	struct ib_sa_path_rec	rec;
158219820Sjeff	unsigned long		lookups;
159219820Sjeff};
160219820Sjeff
161219820Sjeffstruct ib_sa_mad_iter {
162219820Sjeff	struct ib_mad_recv_wc	*recv_wc;
163219820Sjeff	struct ib_mad_recv_buf	*recv_buf;
164219820Sjeff	int			attr_size;
165219820Sjeff	int			attr_offset;
166219820Sjeff	int			data_offset;
167219820Sjeff	int			data_left;
168219820Sjeff	void			*attr;
169219820Sjeff	u8			attr_data[0];
170219820Sjeff};
171219820Sjeff
172219820Sjeffenum sa_update_type {
173219820Sjeff	SA_UPDATE_FULL,
174219820Sjeff	SA_UPDATE_ADD,
175219820Sjeff	SA_UPDATE_REMOVE
176219820Sjeff};
177219820Sjeff
178219820Sjeffstruct update_info {
179219820Sjeff	struct list_head	list;
180219820Sjeff	union ib_gid		gid;
181219820Sjeff	enum sa_update_type	type;
182219820Sjeff};
183219820Sjeff
184219820Sjeffstruct sa_path_request {
185219820Sjeff	struct work_struct	work;
186219820Sjeff	struct ib_sa_client	*client;
187219820Sjeff	void			(*callback)(int, struct ib_sa_path_rec *, void *);
188219820Sjeff	void			*context;
189219820Sjeff	struct ib_sa_path_rec	path_rec;
190219820Sjeff};
191219820Sjeff
192219820Sjeffstatic void process_updates(struct sa_db_port *port);
193219820Sjeff
194219820Sjeffstatic void free_attr_list(struct ib_sa_attr_list *attr_list)
195219820Sjeff{
196219820Sjeff	struct ib_sa_iterator *cur;
197219820Sjeff
198219820Sjeff	for (cur = attr_list->iter.next; cur; cur = attr_list->iter.next) {
199219820Sjeff		attr_list->iter.next = cur->next;
200219820Sjeff		kfree(cur);
201219820Sjeff	}
202219820Sjeff	attr_list->tail = &attr_list->iter;
203219820Sjeff}
204219820Sjeff
205219820Sjeffstatic void remove_attr(struct rb_root *root, struct ib_sa_attr_list *attr_list)
206219820Sjeff{
207219820Sjeff	rb_erase(&attr_list->node, root);
208219820Sjeff	free_attr_list(attr_list);
209219820Sjeff	kfree(attr_list);
210219820Sjeff}
211219820Sjeff
212219820Sjeffstatic void remove_all_attrs(struct rb_root *root)
213219820Sjeff{
214219820Sjeff	struct rb_node *node, *next_node;
215219820Sjeff	struct ib_sa_attr_list *attr_list;
216219820Sjeff
217219820Sjeff	write_lock_irq(&rwlock);
218219820Sjeff	for (node = rb_first(root); node; node = next_node) {
219219820Sjeff		next_node = rb_next(node);
220219820Sjeff		attr_list = rb_entry(node, struct ib_sa_attr_list, node);
221219820Sjeff		remove_attr(root, attr_list);
222219820Sjeff	}
223219820Sjeff	write_unlock_irq(&rwlock);
224219820Sjeff}
225219820Sjeff
226219820Sjeffstatic void remove_old_attrs(struct rb_root *root, unsigned long update_id)
227219820Sjeff{
228219820Sjeff	struct rb_node *node, *next_node;
229219820Sjeff	struct ib_sa_attr_list *attr_list;
230219820Sjeff
231219820Sjeff	write_lock_irq(&rwlock);
232219820Sjeff	for (node = rb_first(root); node; node = next_node) {
233219820Sjeff		next_node = rb_next(node);
234219820Sjeff		attr_list = rb_entry(node, struct ib_sa_attr_list, node);
235219820Sjeff		if (attr_list->update_id != update_id)
236219820Sjeff			remove_attr(root, attr_list);
237219820Sjeff	}
238219820Sjeff	write_unlock_irq(&rwlock);
239219820Sjeff}
240219820Sjeff
241219820Sjeffstatic struct ib_sa_attr_list *insert_attr_list(struct rb_root *root,
242219820Sjeff						struct ib_sa_attr_list *attr_list)
243219820Sjeff{
244219820Sjeff	struct rb_node **link = &root->rb_node;
245219820Sjeff	struct rb_node *parent = NULL;
246219820Sjeff	struct ib_sa_attr_list *cur_attr_list;
247219820Sjeff	int cmp;
248219820Sjeff
249219820Sjeff	while (*link) {
250219820Sjeff		parent = *link;
251219820Sjeff		cur_attr_list = rb_entry(parent, struct ib_sa_attr_list, node);
252219820Sjeff		cmp = memcmp(&cur_attr_list->gid, &attr_list->gid,
253219820Sjeff			     sizeof attr_list->gid);
254219820Sjeff		if (cmp < 0)
255219820Sjeff			link = &(*link)->rb_left;
256219820Sjeff		else if (cmp > 0)
257219820Sjeff			link = &(*link)->rb_right;
258219820Sjeff		else
259219820Sjeff			return cur_attr_list;
260219820Sjeff	}
261219820Sjeff	rb_link_node(&attr_list->node, parent, link);
262219820Sjeff	rb_insert_color(&attr_list->node, root);
263219820Sjeff	return NULL;
264219820Sjeff}
265219820Sjeff
266219820Sjeffstatic struct ib_sa_attr_list *find_attr_list(struct rb_root *root, u8 *gid)
267219820Sjeff{
268219820Sjeff	struct rb_node *node = root->rb_node;
269219820Sjeff	struct ib_sa_attr_list *attr_list;
270219820Sjeff	int cmp;
271219820Sjeff
272219820Sjeff	while (node) {
273219820Sjeff		attr_list = rb_entry(node, struct ib_sa_attr_list, node);
274219820Sjeff		cmp = memcmp(&attr_list->gid, gid, sizeof attr_list->gid);
275219820Sjeff		if (cmp < 0)
276219820Sjeff			node = node->rb_left;
277219820Sjeff		else if (cmp > 0)
278219820Sjeff			node = node->rb_right;
279219820Sjeff		else
280219820Sjeff			return attr_list;
281219820Sjeff	}
282219820Sjeff	return NULL;
283219820Sjeff}
284219820Sjeff
285219820Sjeffstatic int insert_attr(struct rb_root *root, unsigned long update_id, void *key,
286219820Sjeff		       struct ib_sa_iterator *iter)
287219820Sjeff{
288219820Sjeff	struct ib_sa_attr_list *attr_list;
289219820Sjeff	void *err;
290219820Sjeff
291219820Sjeff	write_lock_irq(&rwlock);
292219820Sjeff	attr_list = find_attr_list(root, key);
293219820Sjeff	if (!attr_list) {
294219820Sjeff		write_unlock_irq(&rwlock);
295219820Sjeff		attr_list = kmalloc(sizeof *attr_list, GFP_KERNEL);
296219820Sjeff		if (!attr_list)
297219820Sjeff			return -ENOMEM;
298219820Sjeff
299219820Sjeff		attr_list->iter.next = NULL;
300219820Sjeff		attr_list->tail = &attr_list->iter;
301219820Sjeff		attr_list->update_id = update_id;
302219820Sjeff		memcpy(attr_list->gid.raw, key, sizeof attr_list->gid);
303219820Sjeff
304219820Sjeff		write_lock_irq(&rwlock);
305219820Sjeff		err = insert_attr_list(root, attr_list);
306219820Sjeff		if (err) {
307219820Sjeff			write_unlock_irq(&rwlock);
308219820Sjeff			kfree(attr_list);
309219820Sjeff			return PTR_ERR(err);
310219820Sjeff		}
311219820Sjeff	} else if (attr_list->update_id != update_id) {
312219820Sjeff		free_attr_list(attr_list);
313219820Sjeff		attr_list->update_id = update_id;
314219820Sjeff	}
315219820Sjeff
316219820Sjeff	attr_list->tail->next = iter;
317219820Sjeff	iter->next = NULL;
318219820Sjeff	attr_list->tail = iter;
319219820Sjeff	write_unlock_irq(&rwlock);
320219820Sjeff	return 0;
321219820Sjeff}
322219820Sjeff
323219820Sjeffstatic struct ib_sa_mad_iter *ib_sa_iter_create(struct ib_mad_recv_wc *mad_recv_wc)
324219820Sjeff{
325219820Sjeff	struct ib_sa_mad_iter *iter;
326219820Sjeff	struct ib_sa_mad *mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
327219820Sjeff	int attr_size, attr_offset;
328219820Sjeff
329219820Sjeff	attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8;
330219820Sjeff	attr_size = 64;		/* path record length */
331219820Sjeff	if (attr_offset < attr_size)
332219820Sjeff		return ERR_PTR(-EINVAL);
333219820Sjeff
334219820Sjeff	iter = kzalloc(sizeof *iter + attr_size, GFP_KERNEL);
335219820Sjeff	if (!iter)
336219820Sjeff		return ERR_PTR(-ENOMEM);
337219820Sjeff
338219820Sjeff	iter->data_left = mad_recv_wc->mad_len - IB_MGMT_SA_HDR;
339219820Sjeff	iter->recv_wc = mad_recv_wc;
340219820Sjeff	iter->recv_buf = &mad_recv_wc->recv_buf;
341219820Sjeff	iter->attr_offset = attr_offset;
342219820Sjeff	iter->attr_size = attr_size;
343219820Sjeff	return iter;
344219820Sjeff}
345219820Sjeff
346219820Sjeffstatic void ib_sa_iter_free(struct ib_sa_mad_iter *iter)
347219820Sjeff{
348219820Sjeff	kfree(iter);
349219820Sjeff}
350219820Sjeff
351219820Sjeffstatic void *ib_sa_iter_next(struct ib_sa_mad_iter *iter)
352219820Sjeff{
353219820Sjeff	struct ib_sa_mad *mad;
354219820Sjeff	int left, offset = 0;
355219820Sjeff
356219820Sjeff	while (iter->data_left >= iter->attr_offset) {
357219820Sjeff		while (iter->data_offset < IB_MGMT_SA_DATA) {
358219820Sjeff			mad = (struct ib_sa_mad *) iter->recv_buf->mad;
359219820Sjeff
360219820Sjeff			left = IB_MGMT_SA_DATA - iter->data_offset;
361219820Sjeff			if (left < iter->attr_size) {
362219820Sjeff				/* copy first piece of the attribute */
363219820Sjeff				iter->attr = &iter->attr_data;
364219820Sjeff				memcpy(iter->attr,
365219820Sjeff				       &mad->data[iter->data_offset], left);
366219820Sjeff				offset = left;
367219820Sjeff				break;
368219820Sjeff			} else if (offset) {
369219820Sjeff				/* copy the second piece of the attribute */
370219820Sjeff				memcpy(iter->attr + offset, &mad->data[0],
371219820Sjeff				       iter->attr_size - offset);
372219820Sjeff				iter->data_offset = iter->attr_size - offset;
373219820Sjeff				offset = 0;
374219820Sjeff			} else {
375219820Sjeff				iter->attr = &mad->data[iter->data_offset];
376219820Sjeff				iter->data_offset += iter->attr_size;
377219820Sjeff			}
378219820Sjeff
379219820Sjeff			iter->data_left -= iter->attr_offset;
380219820Sjeff			goto out;
381219820Sjeff		}
382219820Sjeff		iter->data_offset = 0;
383219820Sjeff		iter->recv_buf = list_entry(iter->recv_buf->list.next,
384219820Sjeff					    struct ib_mad_recv_buf, list);
385219820Sjeff	}
386219820Sjeff	iter->attr = NULL;
387219820Sjeffout:
388219820Sjeff	return iter->attr;
389219820Sjeff}
390219820Sjeff
391219820Sjeff/*
392219820Sjeff * Copy path records from a received response and insert them into our cache.
393219820Sjeff * A path record in the MADs are in network order, packed, and may
394219820Sjeff * span multiple MAD buffers, just to make our life hard.
395219820Sjeff */
396219820Sjeffstatic void update_path_db(struct sa_db_port *port,
397219820Sjeff			   struct ib_mad_recv_wc *mad_recv_wc,
398219820Sjeff			   enum sa_update_type type)
399219820Sjeff{
400219820Sjeff	struct ib_sa_mad_iter *iter;
401219820Sjeff	struct ib_path_rec_info *path_info;
402219820Sjeff	void *attr;
403219820Sjeff	int ret;
404219820Sjeff
405219820Sjeff	iter = ib_sa_iter_create(mad_recv_wc);
406219820Sjeff	if (IS_ERR(iter))
407219820Sjeff		return;
408219820Sjeff
409219820Sjeff	port->update_id += (type == SA_UPDATE_FULL);
410219820Sjeff
411219820Sjeff	while ((attr = ib_sa_iter_next(iter)) &&
412219820Sjeff	       (path_info = kmalloc(sizeof *path_info, GFP_KERNEL))) {
413219820Sjeff
414219820Sjeff		ib_sa_unpack_attr(&path_info->rec, attr, IB_SA_ATTR_PATH_REC);
415219820Sjeff
416219820Sjeff		ret = insert_attr(&port->paths, port->update_id,
417219820Sjeff				  path_info->rec.dgid.raw, &path_info->iter);
418219820Sjeff		if (ret) {
419219820Sjeff			kfree(path_info);
420219820Sjeff			break;
421219820Sjeff		}
422219820Sjeff	}
423219820Sjeff	ib_sa_iter_free(iter);
424219820Sjeff
425219820Sjeff	if (type == SA_UPDATE_FULL)
426219820Sjeff		remove_old_attrs(&port->paths, port->update_id);
427219820Sjeff}
428219820Sjeff
429219820Sjeffstatic struct ib_mad_send_buf *get_sa_msg(struct sa_db_port *port,
430219820Sjeff					  struct update_info *update)
431219820Sjeff{
432219820Sjeff	struct ib_ah_attr ah_attr;
433219820Sjeff	struct ib_mad_send_buf *msg;
434219820Sjeff
435219820Sjeff	msg = ib_create_send_mad(port->agent, 1, 0, 0, IB_MGMT_SA_HDR,
436219820Sjeff				 IB_MGMT_SA_DATA, GFP_KERNEL);
437219820Sjeff	if (IS_ERR(msg))
438219820Sjeff		return NULL;
439219820Sjeff
440219820Sjeff	memset(&ah_attr, 0, sizeof ah_attr);
441219820Sjeff	ah_attr.dlid = port->sm_lid;
442219820Sjeff	ah_attr.sl = port->sm_sl;
443219820Sjeff	ah_attr.port_num = port->port_num;
444219820Sjeff
445219820Sjeff	msg->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
446219820Sjeff	if (IS_ERR(msg->ah)) {
447219820Sjeff		ib_free_send_mad(msg);
448219820Sjeff		return NULL;
449219820Sjeff	}
450219820Sjeff
451219820Sjeff	msg->timeout_ms = retry_timer;
452219820Sjeff	msg->retries = 0;
453219820Sjeff	msg->context[0] = port;
454219820Sjeff	msg->context[1] = update;
455219820Sjeff	return msg;
456219820Sjeff}
457219820Sjeff
458219820Sjeffstatic __be64 form_tid(u32 hi_tid)
459219820Sjeff{
460219820Sjeff	static atomic_t tid;
461219820Sjeff	return cpu_to_be64((((u64) hi_tid) << 32) |
462219820Sjeff			   ((u32) atomic_inc_return(&tid)));
463219820Sjeff}
464219820Sjeff
465219820Sjeffstatic void format_path_req(struct sa_db_port *port,
466219820Sjeff			    struct update_info *update,
467219820Sjeff			    struct ib_mad_send_buf *msg)
468219820Sjeff{
469219820Sjeff	struct ib_sa_mad *mad = msg->mad;
470219820Sjeff	struct ib_sa_path_rec path_rec;
471219820Sjeff
472219820Sjeff	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
473219820Sjeff	mad->mad_hdr.mgmt_class	   = IB_MGMT_CLASS_SUBN_ADM;
474219820Sjeff	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
475219820Sjeff	mad->mad_hdr.method	   = IB_SA_METHOD_GET_TABLE;
476219820Sjeff	mad->mad_hdr.attr_id	   = cpu_to_be16(IB_SA_ATTR_PATH_REC);
477219820Sjeff	mad->mad_hdr.tid	   = form_tid(msg->mad_agent->hi_tid);
478219820Sjeff
479219820Sjeff	mad->sa_hdr.comp_mask = IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH;
480219820Sjeff
481219820Sjeff	path_rec.sgid = port->gid;
482219820Sjeff	path_rec.numb_path = (u8) paths_per_dest;
483219820Sjeff
484219820Sjeff	if (update->type == SA_UPDATE_ADD) {
485219820Sjeff		mad->sa_hdr.comp_mask |= IB_SA_PATH_REC_DGID;
486219820Sjeff		memcpy(&path_rec.dgid, &update->gid, sizeof path_rec.dgid);
487219820Sjeff	}
488219820Sjeff
489219820Sjeff	ib_sa_pack_attr(mad->data, &path_rec, IB_SA_ATTR_PATH_REC);
490219820Sjeff}
491219820Sjeff
492219820Sjeffstatic int send_query(struct sa_db_port *port,
493219820Sjeff		      struct update_info *update)
494219820Sjeff{
495219820Sjeff	int ret;
496219820Sjeff
497219820Sjeff	port->msg = get_sa_msg(port, update);
498219820Sjeff	if (!port->msg)
499219820Sjeff		return -ENOMEM;
500219820Sjeff
501219820Sjeff	format_path_req(port, update, port->msg);
502219820Sjeff
503219820Sjeff	ret = ib_post_send_mad(port->msg, NULL);
504219820Sjeff	if (ret)
505219820Sjeff		goto err;
506219820Sjeff
507219820Sjeff	return 0;
508219820Sjeff
509219820Sjefferr:
510219820Sjeff	ib_destroy_ah(port->msg->ah);
511219820Sjeff	ib_free_send_mad(port->msg);
512219820Sjeff	return ret;
513219820Sjeff}
514219820Sjeff
515219820Sjeffstatic void add_update(struct sa_db_port *port, u8 *gid,
516219820Sjeff		       enum sa_update_type type)
517219820Sjeff{
518219820Sjeff	struct update_info *update;
519219820Sjeff
520219820Sjeff	update = kmalloc(sizeof *update, GFP_KERNEL);
521219820Sjeff	if (update) {
522219820Sjeff		if (gid)
523219820Sjeff			memcpy(&update->gid, gid, sizeof update->gid);
524219820Sjeff		update->type = type;
525219820Sjeff		list_add(&update->list, &port->update_list);
526219820Sjeff	}
527219820Sjeff
528219820Sjeff	if (port->state == SA_DB_IDLE) {
529219820Sjeff		port->state = SA_DB_REFRESH;
530219820Sjeff		process_updates(port);
531219820Sjeff	}
532219820Sjeff}
533219820Sjeff
534219820Sjeffstatic void clean_update_list(struct sa_db_port *port)
535219820Sjeff{
536219820Sjeff	struct update_info *update;
537219820Sjeff
538219820Sjeff	while (!list_empty(&port->update_list)) {
539219820Sjeff		update = list_entry(port->update_list.next,
540219820Sjeff				    struct update_info, list);
541219820Sjeff		list_del(&update->list);
542219820Sjeff		kfree(update);
543219820Sjeff	}
544219820Sjeff}
545219820Sjeff
546219820Sjeffstatic int notice_handler(int status, struct ib_inform_info *info,
547219820Sjeff			  struct ib_sa_notice *notice)
548219820Sjeff{
549219820Sjeff	struct sa_db_port *port = info->context;
550219820Sjeff	struct ib_sa_notice_data_gid *gid_data;
551219820Sjeff	struct ib_inform_info **pinfo;
552219820Sjeff	enum sa_update_type type;
553219820Sjeff
554219820Sjeff	if (info->trap_number == IB_SA_SM_TRAP_GID_IN_SERVICE) {
555219820Sjeff		pinfo = &port->in_info;
556219820Sjeff		type = SA_UPDATE_ADD;
557219820Sjeff	} else {
558219820Sjeff		pinfo = &port->out_info;
559219820Sjeff		type = SA_UPDATE_REMOVE;
560219820Sjeff	}
561219820Sjeff
562219820Sjeff	mutex_lock(&lock);
563219820Sjeff	if (port->state == SA_DB_DESTROY || !*pinfo) {
564219820Sjeff		mutex_unlock(&lock);
565219820Sjeff		return 0;
566219820Sjeff	}
567219820Sjeff
568219820Sjeff	if (notice) {
569219820Sjeff		gid_data = (struct ib_sa_notice_data_gid *)
570219820Sjeff			   &notice->data_details;
571219820Sjeff		add_update(port, gid_data->gid, type);
572219820Sjeff		mutex_unlock(&lock);
573219820Sjeff	} else if (status == -ENETRESET) {
574219820Sjeff		*pinfo = NULL;
575219820Sjeff		mutex_unlock(&lock);
576219820Sjeff	} else {
577219820Sjeff		if (status)
578219820Sjeff			*pinfo = ERR_PTR(-EINVAL);
579219820Sjeff		port->state = SA_DB_IDLE;
580219820Sjeff		clean_update_list(port);
581219820Sjeff		mutex_unlock(&lock);
582219820Sjeff		queue_work(sa_wq, &port->work);
583219820Sjeff	}
584219820Sjeff
585219820Sjeff	return status;
586219820Sjeff}
587219820Sjeff
588219820Sjeffstatic int reg_in_info(struct sa_db_port *port)
589219820Sjeff{
590219820Sjeff	int ret = 0;
591219820Sjeff
592219820Sjeff	port->in_info = ib_sa_register_inform_info(&sa_client,
593219820Sjeff						   port->dev->device,
594219820Sjeff						   port->port_num,
595219820Sjeff						   IB_SA_SM_TRAP_GID_IN_SERVICE,
596219820Sjeff						   GFP_KERNEL, notice_handler,
597219820Sjeff						   port);
598219820Sjeff	if (IS_ERR(port->in_info))
599219820Sjeff		ret = PTR_ERR(port->in_info);
600219820Sjeff
601219820Sjeff	return ret;
602219820Sjeff}
603219820Sjeff
604219820Sjeffstatic int reg_out_info(struct sa_db_port *port)
605219820Sjeff{
606219820Sjeff	int ret = 0;
607219820Sjeff
608219820Sjeff	port->out_info = ib_sa_register_inform_info(&sa_client,
609219820Sjeff						    port->dev->device,
610219820Sjeff						    port->port_num,
611219820Sjeff						    IB_SA_SM_TRAP_GID_OUT_OF_SERVICE,
612219820Sjeff						    GFP_KERNEL, notice_handler,
613219820Sjeff						    port);
614219820Sjeff	if (IS_ERR(port->out_info))
615219820Sjeff		ret = PTR_ERR(port->out_info);
616219820Sjeff
617219820Sjeff	return ret;
618219820Sjeff}
619219820Sjeff
620219820Sjeffstatic void unsubscribe_port(struct sa_db_port *port)
621219820Sjeff{
622219820Sjeff	if (port->in_info && !IS_ERR(port->in_info))
623219820Sjeff		ib_sa_unregister_inform_info(port->in_info);
624219820Sjeff
625219820Sjeff	if (port->out_info && !IS_ERR(port->out_info))
626219820Sjeff		ib_sa_unregister_inform_info(port->out_info);
627219820Sjeff
628219820Sjeff	port->out_info = NULL;
629219820Sjeff	port->in_info = NULL;
630219820Sjeff
631219820Sjeff}
632219820Sjeff
633219820Sjeffstatic void cleanup_port(struct sa_db_port *port)
634219820Sjeff{
635219820Sjeff	unsubscribe_port(port);
636219820Sjeff
637219820Sjeff	clean_update_list(port);
638219820Sjeff	remove_all_attrs(&port->paths);
639219820Sjeff}
640219820Sjeff
641219820Sjeffstatic int update_port_info(struct sa_db_port *port)
642219820Sjeff{
643219820Sjeff	struct ib_port_attr port_attr;
644219820Sjeff	int ret;
645219820Sjeff
646219820Sjeff	ret = ib_query_port(port->dev->device, port->port_num, &port_attr);
647219820Sjeff	if (ret)
648219820Sjeff		return ret;
649219820Sjeff
650219820Sjeff	if (port_attr.state != IB_PORT_ACTIVE)
651219820Sjeff		return -ENODATA;
652219820Sjeff
653219820Sjeff        port->sm_lid = port_attr.sm_lid;
654219820Sjeff	port->sm_sl = port_attr.sm_sl;
655219820Sjeff	return 0;
656219820Sjeff}
657219820Sjeff
658219820Sjeffstatic void process_updates(struct sa_db_port *port)
659219820Sjeff{
660219820Sjeff	struct update_info *update;
661219820Sjeff	struct ib_sa_attr_list *attr_list;
662219820Sjeff	int ret;
663219820Sjeff
664219820Sjeff	if (!paths_per_dest || update_port_info(port)) {
665219820Sjeff		cleanup_port(port);
666219820Sjeff		goto out;
667219820Sjeff	}
668219820Sjeff
669219820Sjeff	/* Event registration is an optimization, so ignore failures. */
670219820Sjeff	if (subscribe_inform_info) {
671219820Sjeff		if (!port->out_info) {
672219820Sjeff			ret = reg_out_info(port);
673219820Sjeff			if (!ret)
674219820Sjeff				return;
675219820Sjeff		}
676219820Sjeff
677219820Sjeff		if (!port->in_info) {
678219820Sjeff			ret = reg_in_info(port);
679219820Sjeff			if (!ret)
680219820Sjeff				return;
681219820Sjeff		}
682219820Sjeff	} else
683219820Sjeff		unsubscribe_port(port);
684219820Sjeff
685219820Sjeff	while (!list_empty(&port->update_list)) {
686219820Sjeff		update = list_entry(port->update_list.next,
687219820Sjeff				    struct update_info, list);
688219820Sjeff
689219820Sjeff		if (update->type == SA_UPDATE_REMOVE) {
690219820Sjeff			write_lock_irq(&rwlock);
691219820Sjeff			attr_list = find_attr_list(&port->paths,
692219820Sjeff						   update->gid.raw);
693219820Sjeff			if (attr_list)
694219820Sjeff				remove_attr(&port->paths, attr_list);
695219820Sjeff			write_unlock_irq(&rwlock);
696219820Sjeff		} else {
697219820Sjeff			ret = send_query(port, update);
698219820Sjeff			if (!ret)
699219820Sjeff				return;
700219820Sjeff
701219820Sjeff		}
702219820Sjeff		list_del(&update->list);
703219820Sjeff		kfree(update);
704219820Sjeff	}
705219820Sjeffout:
706219820Sjeff	port->state = SA_DB_IDLE;
707219820Sjeff}
708219820Sjeff
709219820Sjeffstatic void refresh_port_db(struct sa_db_port *port)
710219820Sjeff{
711219820Sjeff	if (port->state == SA_DB_DESTROY)
712219820Sjeff		return;
713219820Sjeff
714219820Sjeff	if (port->state == SA_DB_REFRESH) {
715219820Sjeff		clean_update_list(port);
716219820Sjeff		ib_cancel_mad(port->agent, port->msg);
717219820Sjeff	}
718219820Sjeff
719219820Sjeff	add_update(port, NULL, SA_UPDATE_FULL);
720219820Sjeff}
721219820Sjeff
722219820Sjeffstatic void refresh_dev_db(struct sa_db_device *dev)
723219820Sjeff{
724219820Sjeff	int i;
725219820Sjeff
726219820Sjeff	for (i = 0; i < dev->port_count; i++)
727219820Sjeff		refresh_port_db(&dev->port[i]);
728219820Sjeff}
729219820Sjeff
730219820Sjeffstatic void refresh_db(void)
731219820Sjeff{
732219820Sjeff	struct sa_db_device *dev;
733219820Sjeff
734219820Sjeff	list_for_each_entry(dev, &dev_list, list)
735219820Sjeff		refresh_dev_db(dev);
736219820Sjeff}
737219820Sjeff
738219820Sjeffstatic int do_refresh(const char *val, struct kernel_param *kp)
739219820Sjeff{
740219820Sjeff	mutex_lock(&lock);
741219820Sjeff	refresh_db();
742219820Sjeff	mutex_unlock(&lock);
743219820Sjeff	return 0;
744219820Sjeff}
745219820Sjeff
746219820Sjeffstatic int get_lookup_method(char *buf, struct kernel_param *kp)
747219820Sjeff{
748219820Sjeff	return sprintf(buf,
749219820Sjeff		       "%c %d round robin\n"
750219820Sjeff		       "%c %d random",
751219820Sjeff		       (lookup_method == SA_DB_LOOKUP_LEAST_USED) ? '*' : ' ',
752219820Sjeff		       SA_DB_LOOKUP_LEAST_USED,
753219820Sjeff		       (lookup_method == SA_DB_LOOKUP_RANDOM) ? '*' : ' ',
754219820Sjeff		       SA_DB_LOOKUP_RANDOM);
755219820Sjeff}
756219820Sjeff
757219820Sjeffstatic int set_lookup_method(const char *val, struct kernel_param *kp)
758219820Sjeff{
759219820Sjeff	unsigned long method;
760219820Sjeff	int ret = 0;
761219820Sjeff
762219820Sjeff	method = simple_strtoul(val, NULL, 0);
763219820Sjeff
764219820Sjeff	switch (method) {
765219820Sjeff	case SA_DB_LOOKUP_LEAST_USED:
766219820Sjeff	case SA_DB_LOOKUP_RANDOM:
767219820Sjeff		lookup_method = method;
768219820Sjeff		break;
769219820Sjeff	default:
770219820Sjeff		ret = -EINVAL;
771219820Sjeff		break;
772219820Sjeff	}
773219820Sjeff
774219820Sjeff	return ret;
775219820Sjeff}
776219820Sjeff
777219820Sjeffstatic int set_paths_per_dest(const char *val, struct kernel_param *kp)
778219820Sjeff{
779219820Sjeff	int ret;
780219820Sjeff
781219820Sjeff	mutex_lock(&lock);
782219820Sjeff	ret = param_set_ulong(val, kp);
783219820Sjeff	if (ret)
784219820Sjeff		goto out;
785219820Sjeff
786219820Sjeff	if (paths_per_dest > SA_DB_MAX_PATHS_PER_DEST)
787219820Sjeff		paths_per_dest = SA_DB_MAX_PATHS_PER_DEST;
788219820Sjeff	refresh_db();
789219820Sjeffout:
790219820Sjeff	mutex_unlock(&lock);
791219820Sjeff	return ret;
792219820Sjeff}
793219820Sjeff
794219820Sjeffstatic int set_subscribe_inform_info(const char *val, struct kernel_param *kp)
795219820Sjeff{
796219820Sjeff	int ret;
797219820Sjeff
798219820Sjeff	ret = param_set_bool(val, kp);
799219820Sjeff	if (ret)
800219820Sjeff		return ret;
801219820Sjeff
802219820Sjeff	return do_refresh(val, kp);
803219820Sjeff}
804219820Sjeff
805219820Sjeffstatic void port_work_handler(struct work_struct *work)
806219820Sjeff{
807219820Sjeff	struct sa_db_port *port;
808219820Sjeff
809219820Sjeff	port = container_of(work, typeof(*port), work);
810219820Sjeff	mutex_lock(&lock);
811219820Sjeff	refresh_port_db(port);
812219820Sjeff	mutex_unlock(&lock);
813219820Sjeff}
814219820Sjeff
815219820Sjeffstatic void handle_event(struct ib_event_handler *event_handler,
816219820Sjeff			 struct ib_event *event)
817219820Sjeff{
818219820Sjeff	struct sa_db_device *dev;
819219820Sjeff	struct sa_db_port *port;
820219820Sjeff
821219820Sjeff	dev = container_of(event_handler, typeof(*dev), event_handler);
822219820Sjeff	port = &dev->port[event->element.port_num - dev->start_port];
823219820Sjeff
824219820Sjeff	switch (event->event) {
825219820Sjeff	case IB_EVENT_PORT_ERR:
826219820Sjeff	case IB_EVENT_LID_CHANGE:
827219820Sjeff	case IB_EVENT_SM_CHANGE:
828219820Sjeff	case IB_EVENT_CLIENT_REREGISTER:
829219820Sjeff	case IB_EVENT_PKEY_CHANGE:
830219820Sjeff	case IB_EVENT_PORT_ACTIVE:
831219820Sjeff		queue_work(sa_wq, &port->work);
832219820Sjeff		break;
833219820Sjeff	default:
834219820Sjeff		break;
835219820Sjeff	}
836219820Sjeff}
837219820Sjeff
838219820Sjeffstatic void ib_free_path_iter(struct ib_sa_attr_iter *iter)
839219820Sjeff{
840219820Sjeff	read_unlock_irqrestore(&rwlock, iter->flags);
841219820Sjeff}
842219820Sjeff
843219820Sjeffstatic int ib_create_path_iter(struct ib_device *device, u8 port_num,
844219820Sjeff			       union ib_gid *dgid, struct ib_sa_attr_iter *iter)
845219820Sjeff{
846219820Sjeff	struct sa_db_device *dev;
847219820Sjeff	struct sa_db_port *port;
848219820Sjeff	struct ib_sa_attr_list *list;
849219820Sjeff
850219820Sjeff	dev = ib_get_client_data(device, &sa_db_client);
851219820Sjeff	if (!dev)
852219820Sjeff		return -ENODEV;
853219820Sjeff
854219820Sjeff	port = &dev->port[port_num - dev->start_port];
855219820Sjeff
856219820Sjeff	read_lock_irqsave(&rwlock, iter->flags);
857219820Sjeff	list = find_attr_list(&port->paths, dgid->raw);
858219820Sjeff	if (!list) {
859219820Sjeff		ib_free_path_iter(iter);
860219820Sjeff		return -ENODATA;
861219820Sjeff	}
862219820Sjeff
863219820Sjeff	iter->iter = &list->iter;
864219820Sjeff	return 0;
865219820Sjeff}
866219820Sjeff
867219820Sjeffstatic struct ib_sa_path_rec *ib_get_next_path(struct ib_sa_attr_iter *iter)
868219820Sjeff{
869219820Sjeff	struct ib_path_rec_info *next_path;
870219820Sjeff
871219820Sjeff	iter->iter = iter->iter->next;
872219820Sjeff	if (iter->iter) {
873219820Sjeff		next_path = container_of(iter->iter, struct ib_path_rec_info, iter);
874219820Sjeff		return &next_path->rec;
875219820Sjeff	} else
876219820Sjeff		return NULL;
877219820Sjeff}
878219820Sjeff
879219820Sjeffstatic int cmp_rec(struct ib_sa_path_rec *src,
880219820Sjeff		   struct ib_sa_path_rec *dst, ib_sa_comp_mask comp_mask)
881219820Sjeff{
882219820Sjeff	/* DGID check already done */
883219820Sjeff	if (comp_mask & IB_SA_PATH_REC_SGID &&
884219820Sjeff	    memcmp(&src->sgid, &dst->sgid, sizeof src->sgid))
885219820Sjeff		return -EINVAL;
886219820Sjeff	if (comp_mask & IB_SA_PATH_REC_DLID && src->dlid != dst->dlid)
887219820Sjeff		return -EINVAL;
888219820Sjeff	if (comp_mask & IB_SA_PATH_REC_SLID && src->slid != dst->slid)
889219820Sjeff		return -EINVAL;
890219820Sjeff	if (comp_mask & IB_SA_PATH_REC_RAW_TRAFFIC &&
891219820Sjeff	    src->raw_traffic != dst->raw_traffic)
892219820Sjeff		return -EINVAL;
893219820Sjeff
894219820Sjeff	if (comp_mask & IB_SA_PATH_REC_FLOW_LABEL &&
895219820Sjeff	    src->flow_label != dst->flow_label)
896219820Sjeff		return -EINVAL;
897219820Sjeff	if (comp_mask & IB_SA_PATH_REC_HOP_LIMIT &&
898219820Sjeff	    src->hop_limit != dst->hop_limit)
899219820Sjeff		return -EINVAL;
900219820Sjeff	if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS &&
901219820Sjeff	    src->traffic_class != dst->traffic_class)
902219820Sjeff		return -EINVAL;
903219820Sjeff	if (comp_mask & IB_SA_PATH_REC_REVERSIBLE &&
904219820Sjeff	    dst->reversible && !src->reversible)
905219820Sjeff		return -EINVAL;
906219820Sjeff	/* Numb path check already done */
907219820Sjeff	if (comp_mask & IB_SA_PATH_REC_PKEY && src->pkey != dst->pkey)
908219820Sjeff		return -EINVAL;
909219820Sjeff
910219820Sjeff	if (comp_mask & IB_SA_PATH_REC_SL && src->sl != dst->sl)
911219820Sjeff		return -EINVAL;
912219820Sjeff
913219820Sjeff	if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_MTU_SELECTOR,
914219820Sjeff				 IB_SA_PATH_REC_MTU, dst->mtu_selector,
915219820Sjeff				 src->mtu, dst->mtu))
916219820Sjeff		return -EINVAL;
917219820Sjeff	if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_RATE_SELECTOR,
918219820Sjeff				 IB_SA_PATH_REC_RATE, dst->rate_selector,
919219820Sjeff				 src->rate, dst->rate))
920219820Sjeff		return -EINVAL;
921219820Sjeff	if (ib_sa_check_selector(comp_mask,
922219820Sjeff				 IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR,
923219820Sjeff				 IB_SA_PATH_REC_PACKET_LIFE_TIME,
924219820Sjeff				 dst->packet_life_time_selector,
925219820Sjeff				 src->packet_life_time, dst->packet_life_time))
926219820Sjeff		return -EINVAL;
927219820Sjeff
928219820Sjeff	return 0;
929219820Sjeff}
930219820Sjeff
931219820Sjeffstatic struct ib_sa_path_rec *get_random_path(struct ib_sa_attr_iter *iter,
932219820Sjeff					      struct ib_sa_path_rec *req_path,
933219820Sjeff					      ib_sa_comp_mask comp_mask)
934219820Sjeff{
935219820Sjeff	struct ib_sa_path_rec *path, *rand_path = NULL;
936219820Sjeff	int num, count = 0;
937219820Sjeff
938219820Sjeff	for (path = ib_get_next_path(iter); path;
939219820Sjeff	     path = ib_get_next_path(iter)) {
940219820Sjeff		if (!cmp_rec(path, req_path, comp_mask)) {
941219820Sjeff			get_random_bytes(&num, sizeof num);
942219820Sjeff			if ((num % ++count) == 0)
943219820Sjeff				rand_path = path;
944219820Sjeff		}
945219820Sjeff	}
946219820Sjeff
947219820Sjeff	return rand_path;
948219820Sjeff}
949219820Sjeff
950219820Sjeffstatic struct ib_sa_path_rec *get_next_path(struct ib_sa_attr_iter *iter,
951219820Sjeff					    struct ib_sa_path_rec *req_path,
952219820Sjeff					    ib_sa_comp_mask comp_mask)
953219820Sjeff{
954219820Sjeff	struct ib_path_rec_info *cur_path, *next_path = NULL;
955219820Sjeff	struct ib_sa_path_rec *path;
956219820Sjeff	unsigned long lookups = ~0;
957219820Sjeff
958219820Sjeff	for (path = ib_get_next_path(iter); path;
959219820Sjeff	     path = ib_get_next_path(iter)) {
960219820Sjeff		if (!cmp_rec(path, req_path, comp_mask)) {
961219820Sjeff
962219820Sjeff			cur_path = container_of(iter->iter, struct ib_path_rec_info,
963219820Sjeff						iter);
964219820Sjeff			if (cur_path->lookups < lookups) {
965219820Sjeff				lookups = cur_path->lookups;
966219820Sjeff				next_path = cur_path;
967219820Sjeff			}
968219820Sjeff		}
969219820Sjeff	}
970219820Sjeff
971219820Sjeff	if (next_path) {
972219820Sjeff		next_path->lookups++;
973219820Sjeff		return &next_path->rec;
974219820Sjeff	} else
975219820Sjeff		return NULL;
976219820Sjeff}
977219820Sjeff
978219820Sjeffstatic void report_path(struct work_struct *work)
979219820Sjeff{
980219820Sjeff	struct sa_path_request *req;
981219820Sjeff
982219820Sjeff	req = container_of(work, struct sa_path_request, work);
983219820Sjeff	req->callback(0, &req->path_rec, req->context);
984219820Sjeff	ib_sa_client_put(req->client);
985219820Sjeff	kfree(req);
986219820Sjeff}
987219820Sjeff
988219820Sjeff/**
989219820Sjeff * ib_sa_path_rec_get - Start a Path get query
990219820Sjeff * @client:SA client
991219820Sjeff * @device:device to send query on
992219820Sjeff * @port_num: port number to send query on
993219820Sjeff * @rec:Path Record to send in query
994219820Sjeff * @comp_mask:component mask to send in query
995219820Sjeff * @timeout_ms:time to wait for response
996219820Sjeff * @gfp_mask:GFP mask to use for internal allocations
997219820Sjeff * @callback:function called when query completes, times out or is
998219820Sjeff * canceled
999219820Sjeff * @context:opaque user context passed to callback
1000219820Sjeff * @sa_query:query context, used to cancel query
1001219820Sjeff *
1002219820Sjeff * Send a Path Record Get query to the SA to look up a path.  The
1003219820Sjeff * callback function will be called when the query completes (or
1004219820Sjeff * fails); status is 0 for a successful response, -EINTR if the query
1005219820Sjeff * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1006219820Sjeff * occurred sending the query.  The resp parameter of the callback is
1007219820Sjeff * only valid if status is 0.
1008219820Sjeff *
1009219820Sjeff * If the return value of ib_sa_path_rec_get() is negative, it is an
1010219820Sjeff * error code.  Otherwise it is a query ID that can be used to cancel
1011219820Sjeff * the query.
1012219820Sjeff */
1013219820Sjeffint ib_sa_path_rec_get(struct ib_sa_client *client,
1014219820Sjeff		       struct ib_device *device, u8 port_num,
1015219820Sjeff		       struct ib_sa_path_rec *rec,
1016219820Sjeff		       ib_sa_comp_mask comp_mask,
1017219820Sjeff		       int timeout_ms, gfp_t gfp_mask,
1018219820Sjeff		       void (*callback)(int status,
1019219820Sjeff					struct ib_sa_path_rec *resp,
1020219820Sjeff					void *context),
1021219820Sjeff		       void *context,
1022219820Sjeff		       struct ib_sa_query **sa_query)
1023219820Sjeff{
1024219820Sjeff	struct sa_path_request *req;
1025219820Sjeff	struct ib_sa_attr_iter iter;
1026219820Sjeff	struct ib_sa_path_rec *path_rec;
1027219820Sjeff	int ret;
1028219820Sjeff
1029219820Sjeff	if (!paths_per_dest)
1030219820Sjeff		goto query_sa;
1031219820Sjeff
1032219820Sjeff	if (!(comp_mask & IB_SA_PATH_REC_DGID) ||
1033219820Sjeff	    !(comp_mask & IB_SA_PATH_REC_NUMB_PATH) || rec->numb_path != 1)
1034219820Sjeff		goto query_sa;
1035219820Sjeff
1036219820Sjeff	req = kmalloc(sizeof *req, gfp_mask);
1037219820Sjeff	if (!req)
1038219820Sjeff		goto query_sa;
1039219820Sjeff
1040219820Sjeff	ret = ib_create_path_iter(device, port_num, &rec->dgid, &iter);
1041219820Sjeff	if (ret)
1042219820Sjeff		goto free_req;
1043219820Sjeff
1044219820Sjeff	if (lookup_method == SA_DB_LOOKUP_RANDOM)
1045219820Sjeff		path_rec = get_random_path(&iter, rec, comp_mask);
1046219820Sjeff	else
1047219820Sjeff		path_rec = get_next_path(&iter, rec, comp_mask);
1048219820Sjeff
1049219820Sjeff	if (!path_rec)
1050219820Sjeff		goto free_iter;
1051219820Sjeff
1052219820Sjeff	memcpy(&req->path_rec, path_rec, sizeof *path_rec);
1053219820Sjeff	ib_free_path_iter(&iter);
1054219820Sjeff
1055219820Sjeff	INIT_WORK(&req->work, report_path);
1056219820Sjeff	req->client = client;
1057219820Sjeff	req->callback = callback;
1058219820Sjeff	req->context = context;
1059219820Sjeff
1060219820Sjeff	ib_sa_client_get(client);
1061219820Sjeff	queue_work(sa_wq, &req->work);
1062219820Sjeff	*sa_query = ERR_PTR(-EEXIST);
1063219820Sjeff	return 0;
1064219820Sjeff
1065219820Sjefffree_iter:
1066219820Sjeff	ib_free_path_iter(&iter);
1067219820Sjefffree_req:
1068219820Sjeff	kfree(req);
1069219820Sjeffquery_sa:
1070219820Sjeff	return ib_sa_path_rec_query(client, device, port_num, rec, comp_mask,
1071219820Sjeff				    timeout_ms, gfp_mask, callback, context,
1072219820Sjeff				    sa_query);
1073219820Sjeff}
1074219820SjeffEXPORT_SYMBOL(ib_sa_path_rec_get);
1075219820Sjeff
1076219820Sjeffstatic void recv_handler(struct ib_mad_agent *mad_agent,
1077219820Sjeff			 struct ib_mad_recv_wc *mad_recv_wc)
1078219820Sjeff{
1079219820Sjeff	struct sa_db_port *port;
1080219820Sjeff	struct update_info *update;
1081219820Sjeff	struct ib_mad_send_buf *msg;
1082219820Sjeff	enum sa_update_type type;
1083219820Sjeff
1084219820Sjeff	msg = (struct ib_mad_send_buf *) (unsigned long) mad_recv_wc->wc->wr_id;
1085219820Sjeff	port = msg->context[0];
1086219820Sjeff	update = msg->context[1];
1087219820Sjeff
1088219820Sjeff	mutex_lock(&lock);
1089219820Sjeff	if (port->state == SA_DB_DESTROY ||
1090219820Sjeff	    update != list_entry(port->update_list.next,
1091219820Sjeff				 struct update_info, list)) {
1092219820Sjeff		mutex_unlock(&lock);
1093219820Sjeff	} else {
1094219820Sjeff		type = update->type;
1095219820Sjeff		mutex_unlock(&lock);
1096219820Sjeff		update_path_db(mad_agent->context, mad_recv_wc, type);
1097219820Sjeff	}
1098219820Sjeff
1099219820Sjeff	ib_free_recv_mad(mad_recv_wc);
1100219820Sjeff}
1101219820Sjeff
1102219820Sjeffstatic void send_handler(struct ib_mad_agent *agent,
1103219820Sjeff			 struct ib_mad_send_wc *mad_send_wc)
1104219820Sjeff{
1105219820Sjeff	struct ib_mad_send_buf *msg;
1106219820Sjeff	struct sa_db_port *port;
1107219820Sjeff	struct update_info *update;
1108219820Sjeff	int ret;
1109219820Sjeff
1110219820Sjeff	msg = mad_send_wc->send_buf;
1111219820Sjeff	port = msg->context[0];
1112219820Sjeff	update = msg->context[1];
1113219820Sjeff
1114219820Sjeff	mutex_lock(&lock);
1115219820Sjeff	if (port->state == SA_DB_DESTROY)
1116219820Sjeff		goto unlock;
1117219820Sjeff
1118219820Sjeff	if (update == list_entry(port->update_list.next,
1119219820Sjeff				 struct update_info, list)) {
1120219820Sjeff
1121219820Sjeff		if (mad_send_wc->status == IB_WC_RESP_TIMEOUT_ERR &&
1122219820Sjeff		    msg->timeout_ms < SA_DB_MAX_RETRY_TIMER) {
1123219820Sjeff
1124219820Sjeff			msg->timeout_ms <<= 1;
1125219820Sjeff			ret = ib_post_send_mad(msg, NULL);
1126219820Sjeff			if (!ret) {
1127219820Sjeff				mutex_unlock(&lock);
1128219820Sjeff				return;
1129219820Sjeff			}
1130219820Sjeff		}
1131219820Sjeff		list_del(&update->list);
1132219820Sjeff		kfree(update);
1133219820Sjeff	}
1134219820Sjeff	process_updates(port);
1135219820Sjeffunlock:
1136219820Sjeff	mutex_unlock(&lock);
1137219820Sjeff
1138219820Sjeff	ib_destroy_ah(msg->ah);
1139219820Sjeff	ib_free_send_mad(msg);
1140219820Sjeff}
1141219820Sjeff
1142219820Sjeffstatic int init_port(struct sa_db_device *dev, int port_num)
1143219820Sjeff{
1144219820Sjeff	struct sa_db_port *port;
1145219820Sjeff	int ret;
1146219820Sjeff
1147219820Sjeff	port = &dev->port[port_num - dev->start_port];
1148219820Sjeff	port->dev = dev;
1149219820Sjeff	port->port_num = port_num;
1150219820Sjeff	INIT_WORK(&port->work, port_work_handler);
1151219820Sjeff	port->paths = RB_ROOT;
1152219820Sjeff	INIT_LIST_HEAD(&port->update_list);
1153219820Sjeff
1154219820Sjeff	ret = ib_get_cached_gid(dev->device, port_num, 0, &port->gid);
1155219820Sjeff	if (ret)
1156219820Sjeff		return ret;
1157219820Sjeff
1158219820Sjeff	port->agent = ib_register_mad_agent(dev->device, port_num, IB_QPT_GSI,
1159219820Sjeff					    NULL, IB_MGMT_RMPP_VERSION,
1160219820Sjeff					    send_handler, recv_handler, port);
1161219820Sjeff	if (IS_ERR(port->agent))
1162219820Sjeff		ret = PTR_ERR(port->agent);
1163219820Sjeff
1164219820Sjeff	return ret;
1165219820Sjeff}
1166219820Sjeff
1167219820Sjeffstatic void destroy_port(struct sa_db_port *port)
1168219820Sjeff{
1169219820Sjeff	mutex_lock(&lock);
1170219820Sjeff	port->state = SA_DB_DESTROY;
1171219820Sjeff	mutex_unlock(&lock);
1172219820Sjeff
1173219820Sjeff	ib_unregister_mad_agent(port->agent);
1174219820Sjeff	cleanup_port(port);
1175219820Sjeff	flush_workqueue(sa_wq);
1176219820Sjeff}
1177219820Sjeff
1178219820Sjeffstatic void sa_db_add_dev(struct ib_device *device)
1179219820Sjeff{
1180219820Sjeff	struct sa_db_device *dev;
1181219820Sjeff	struct sa_db_port *port;
1182219820Sjeff	int s, e, i, ret;
1183219820Sjeff
1184219820Sjeff	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1185219820Sjeff		return;
1186219820Sjeff
1187219820Sjeff	if (device->node_type == RDMA_NODE_IB_SWITCH) {
1188219820Sjeff		s = e = 0;
1189219820Sjeff	} else {
1190219820Sjeff		s = 1;
1191219820Sjeff		e = device->phys_port_cnt;
1192219820Sjeff	}
1193219820Sjeff
1194219820Sjeff	dev = kzalloc(sizeof *dev + (e - s + 1) * sizeof *port, GFP_KERNEL);
1195219820Sjeff	if (!dev)
1196219820Sjeff		return;
1197219820Sjeff
1198219820Sjeff	dev->start_port = s;
1199219820Sjeff	dev->port_count = e - s + 1;
1200219820Sjeff	dev->device = device;
1201219820Sjeff	for (i = 0; i < dev->port_count; i++) {
1202219820Sjeff		ret = init_port(dev, s + i);
1203219820Sjeff		if (ret)
1204219820Sjeff			goto err;
1205219820Sjeff	}
1206219820Sjeff
1207219820Sjeff	ib_set_client_data(device, &sa_db_client, dev);
1208219820Sjeff
1209219820Sjeff	INIT_IB_EVENT_HANDLER(&dev->event_handler, device, handle_event);
1210219820Sjeff
1211219820Sjeff	mutex_lock(&lock);
1212219820Sjeff	list_add_tail(&dev->list, &dev_list);
1213219820Sjeff	refresh_dev_db(dev);
1214219820Sjeff	mutex_unlock(&lock);
1215219820Sjeff
1216219820Sjeff	ib_register_event_handler(&dev->event_handler);
1217219820Sjeff	return;
1218219820Sjefferr:
1219219820Sjeff	while (i--)
1220219820Sjeff		destroy_port(&dev->port[i]);
1221219820Sjeff	kfree(dev);
1222219820Sjeff}
1223219820Sjeff
1224219820Sjeffstatic void sa_db_remove_dev(struct ib_device *device)
1225219820Sjeff{
1226219820Sjeff	struct sa_db_device *dev;
1227219820Sjeff	int i;
1228219820Sjeff
1229219820Sjeff	dev = ib_get_client_data(device, &sa_db_client);
1230219820Sjeff	if (!dev)
1231219820Sjeff		return;
1232219820Sjeff
1233219820Sjeff	ib_unregister_event_handler(&dev->event_handler);
1234219820Sjeff	flush_workqueue(sa_wq);
1235219820Sjeff
1236219820Sjeff	for (i = 0; i < dev->port_count; i++)
1237219820Sjeff		destroy_port(&dev->port[i]);
1238219820Sjeff
1239219820Sjeff	mutex_lock(&lock);
1240219820Sjeff	list_del(&dev->list);
1241219820Sjeff	mutex_unlock(&lock);
1242219820Sjeff
1243219820Sjeff	kfree(dev);
1244219820Sjeff}
1245219820Sjeff
1246219820Sjeffint sa_db_init(void)
1247219820Sjeff{
1248219820Sjeff	int ret;
1249219820Sjeff
1250219820Sjeff	rwlock_init(&rwlock);
1251219820Sjeff	sa_wq = create_singlethread_workqueue("local_sa");
1252219820Sjeff	if (!sa_wq)
1253219820Sjeff		return -ENOMEM;
1254219820Sjeff
1255219820Sjeff	ib_sa_register_client(&sa_client);
1256219820Sjeff	ret = ib_register_client(&sa_db_client);
1257219820Sjeff	if (ret)
1258219820Sjeff		goto err;
1259219820Sjeff
1260219820Sjeff	return 0;
1261219820Sjeff
1262219820Sjefferr:
1263219820Sjeff	ib_sa_unregister_client(&sa_client);
1264219820Sjeff	destroy_workqueue(sa_wq);
1265219820Sjeff	return ret;
1266219820Sjeff}
1267219820Sjeff
1268219820Sjeffvoid sa_db_cleanup(void)
1269219820Sjeff{
1270219820Sjeff	ib_unregister_client(&sa_db_client);
1271219820Sjeff	ib_sa_unregister_client(&sa_client);
1272219820Sjeff	destroy_workqueue(sa_wq);
1273219820Sjeff}
1274