1/*
2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc.�� All rights reserved.
4 * Copyright (c) 2006 Intel Corporation.  All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 * $Id: sa_query.c,v 1.1.1.1 2007/10/11 23:31:50 Exp $
35 */
36
37#include <linux/module.h>
38#include <linux/init.h>
39#include <linux/err.h>
40#include <linux/random.h>
41#include <linux/spinlock.h>
42#include <linux/slab.h>
43#include <linux/dma-mapping.h>
44#include <linux/kref.h>
45#include <linux/idr.h>
46#include <linux/workqueue.h>
47
48#include <rdma/ib_pack.h>
49#include <rdma/ib_cache.h>
50#include "sa.h"
51
52MODULE_AUTHOR("Roland Dreier");
53MODULE_DESCRIPTION("InfiniBand subnet administration query support");
54MODULE_LICENSE("Dual BSD/GPL");
55
56struct ib_sa_sm_ah {
57	struct ib_ah        *ah;
58	struct kref          ref;
59	u8		     src_path_mask;
60};
61
62struct ib_sa_port {
63	struct ib_mad_agent *agent;
64	struct ib_sa_sm_ah  *sm_ah;
65	struct work_struct   update_task;
66	spinlock_t           ah_lock;
67	u8                   port_num;
68};
69
70struct ib_sa_device {
71	int                     start_port, end_port;
72	struct ib_event_handler event_handler;
73	struct ib_sa_port port[0];
74};
75
76struct ib_sa_query {
77	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
78	void (*release)(struct ib_sa_query *);
79	struct ib_sa_client    *client;
80	struct ib_sa_port      *port;
81	struct ib_mad_send_buf *mad_buf;
82	struct ib_sa_sm_ah     *sm_ah;
83	int			id;
84};
85
86struct ib_sa_service_query {
87	void (*callback)(int, struct ib_sa_service_rec *, void *);
88	void *context;
89	struct ib_sa_query sa_query;
90};
91
92struct ib_sa_path_query {
93	void (*callback)(int, struct ib_sa_path_rec *, void *);
94	void *context;
95	struct ib_sa_query sa_query;
96};
97
98struct ib_sa_mcmember_query {
99	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
100	void *context;
101	struct ib_sa_query sa_query;
102};
103
104static void ib_sa_add_one(struct ib_device *device);
105static void ib_sa_remove_one(struct ib_device *device);
106
107static struct ib_client sa_client = {
108	.name   = "sa",
109	.add    = ib_sa_add_one,
110	.remove = ib_sa_remove_one
111};
112
113static spinlock_t idr_lock;
114static DEFINE_IDR(query_idr);
115
116static spinlock_t tid_lock;
117static u32 tid;
118
119#define PATH_REC_FIELD(field) \
120	.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),		\
121	.struct_size_bytes   = sizeof ((struct ib_sa_path_rec *) 0)->field,	\
122	.field_name          = "sa_path_rec:" #field
123
124static const struct ib_field path_rec_table[] = {
125	{ RESERVED,
126	  .offset_words = 0,
127	  .offset_bits  = 0,
128	  .size_bits    = 32 },
129	{ RESERVED,
130	  .offset_words = 1,
131	  .offset_bits  = 0,
132	  .size_bits    = 32 },
133	{ PATH_REC_FIELD(dgid),
134	  .offset_words = 2,
135	  .offset_bits  = 0,
136	  .size_bits    = 128 },
137	{ PATH_REC_FIELD(sgid),
138	  .offset_words = 6,
139	  .offset_bits  = 0,
140	  .size_bits    = 128 },
141	{ PATH_REC_FIELD(dlid),
142	  .offset_words = 10,
143	  .offset_bits  = 0,
144	  .size_bits    = 16 },
145	{ PATH_REC_FIELD(slid),
146	  .offset_words = 10,
147	  .offset_bits  = 16,
148	  .size_bits    = 16 },
149	{ PATH_REC_FIELD(raw_traffic),
150	  .offset_words = 11,
151	  .offset_bits  = 0,
152	  .size_bits    = 1 },
153	{ RESERVED,
154	  .offset_words = 11,
155	  .offset_bits  = 1,
156	  .size_bits    = 3 },
157	{ PATH_REC_FIELD(flow_label),
158	  .offset_words = 11,
159	  .offset_bits  = 4,
160	  .size_bits    = 20 },
161	{ PATH_REC_FIELD(hop_limit),
162	  .offset_words = 11,
163	  .offset_bits  = 24,
164	  .size_bits    = 8 },
165	{ PATH_REC_FIELD(traffic_class),
166	  .offset_words = 12,
167	  .offset_bits  = 0,
168	  .size_bits    = 8 },
169	{ PATH_REC_FIELD(reversible),
170	  .offset_words = 12,
171	  .offset_bits  = 8,
172	  .size_bits    = 1 },
173	{ PATH_REC_FIELD(numb_path),
174	  .offset_words = 12,
175	  .offset_bits  = 9,
176	  .size_bits    = 7 },
177	{ PATH_REC_FIELD(pkey),
178	  .offset_words = 12,
179	  .offset_bits  = 16,
180	  .size_bits    = 16 },
181	{ RESERVED,
182	  .offset_words = 13,
183	  .offset_bits  = 0,
184	  .size_bits    = 12 },
185	{ PATH_REC_FIELD(sl),
186	  .offset_words = 13,
187	  .offset_bits  = 12,
188	  .size_bits    = 4 },
189	{ PATH_REC_FIELD(mtu_selector),
190	  .offset_words = 13,
191	  .offset_bits  = 16,
192	  .size_bits    = 2 },
193	{ PATH_REC_FIELD(mtu),
194	  .offset_words = 13,
195	  .offset_bits  = 18,
196	  .size_bits    = 6 },
197	{ PATH_REC_FIELD(rate_selector),
198	  .offset_words = 13,
199	  .offset_bits  = 24,
200	  .size_bits    = 2 },
201	{ PATH_REC_FIELD(rate),
202	  .offset_words = 13,
203	  .offset_bits  = 26,
204	  .size_bits    = 6 },
205	{ PATH_REC_FIELD(packet_life_time_selector),
206	  .offset_words = 14,
207	  .offset_bits  = 0,
208	  .size_bits    = 2 },
209	{ PATH_REC_FIELD(packet_life_time),
210	  .offset_words = 14,
211	  .offset_bits  = 2,
212	  .size_bits    = 6 },
213	{ PATH_REC_FIELD(preference),
214	  .offset_words = 14,
215	  .offset_bits  = 8,
216	  .size_bits    = 8 },
217	{ RESERVED,
218	  .offset_words = 14,
219	  .offset_bits  = 16,
220	  .size_bits    = 48 },
221};
222
223#define MCMEMBER_REC_FIELD(field) \
224	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
225	.struct_size_bytes   = sizeof ((struct ib_sa_mcmember_rec *) 0)->field,	\
226	.field_name          = "sa_mcmember_rec:" #field
227
228static const struct ib_field mcmember_rec_table[] = {
229	{ MCMEMBER_REC_FIELD(mgid),
230	  .offset_words = 0,
231	  .offset_bits  = 0,
232	  .size_bits    = 128 },
233	{ MCMEMBER_REC_FIELD(port_gid),
234	  .offset_words = 4,
235	  .offset_bits  = 0,
236	  .size_bits    = 128 },
237	{ MCMEMBER_REC_FIELD(qkey),
238	  .offset_words = 8,
239	  .offset_bits  = 0,
240	  .size_bits    = 32 },
241	{ MCMEMBER_REC_FIELD(mlid),
242	  .offset_words = 9,
243	  .offset_bits  = 0,
244	  .size_bits    = 16 },
245	{ MCMEMBER_REC_FIELD(mtu_selector),
246	  .offset_words = 9,
247	  .offset_bits  = 16,
248	  .size_bits    = 2 },
249	{ MCMEMBER_REC_FIELD(mtu),
250	  .offset_words = 9,
251	  .offset_bits  = 18,
252	  .size_bits    = 6 },
253	{ MCMEMBER_REC_FIELD(traffic_class),
254	  .offset_words = 9,
255	  .offset_bits  = 24,
256	  .size_bits    = 8 },
257	{ MCMEMBER_REC_FIELD(pkey),
258	  .offset_words = 10,
259	  .offset_bits  = 0,
260	  .size_bits    = 16 },
261	{ MCMEMBER_REC_FIELD(rate_selector),
262	  .offset_words = 10,
263	  .offset_bits  = 16,
264	  .size_bits    = 2 },
265	{ MCMEMBER_REC_FIELD(rate),
266	  .offset_words = 10,
267	  .offset_bits  = 18,
268	  .size_bits    = 6 },
269	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
270	  .offset_words = 10,
271	  .offset_bits  = 24,
272	  .size_bits    = 2 },
273	{ MCMEMBER_REC_FIELD(packet_life_time),
274	  .offset_words = 10,
275	  .offset_bits  = 26,
276	  .size_bits    = 6 },
277	{ MCMEMBER_REC_FIELD(sl),
278	  .offset_words = 11,
279	  .offset_bits  = 0,
280	  .size_bits    = 4 },
281	{ MCMEMBER_REC_FIELD(flow_label),
282	  .offset_words = 11,
283	  .offset_bits  = 4,
284	  .size_bits    = 20 },
285	{ MCMEMBER_REC_FIELD(hop_limit),
286	  .offset_words = 11,
287	  .offset_bits  = 24,
288	  .size_bits    = 8 },
289	{ MCMEMBER_REC_FIELD(scope),
290	  .offset_words = 12,
291	  .offset_bits  = 0,
292	  .size_bits    = 4 },
293	{ MCMEMBER_REC_FIELD(join_state),
294	  .offset_words = 12,
295	  .offset_bits  = 4,
296	  .size_bits    = 4 },
297	{ MCMEMBER_REC_FIELD(proxy_join),
298	  .offset_words = 12,
299	  .offset_bits  = 8,
300	  .size_bits    = 1 },
301	{ RESERVED,
302	  .offset_words = 12,
303	  .offset_bits  = 9,
304	  .size_bits    = 23 },
305};
306
307#define SERVICE_REC_FIELD(field) \
308	.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field),	\
309	.struct_size_bytes   = sizeof ((struct ib_sa_service_rec *) 0)->field,	\
310	.field_name          = "sa_service_rec:" #field
311
312static const struct ib_field service_rec_table[] = {
313	{ SERVICE_REC_FIELD(id),
314	  .offset_words = 0,
315	  .offset_bits  = 0,
316	  .size_bits    = 64 },
317	{ SERVICE_REC_FIELD(gid),
318	  .offset_words = 2,
319	  .offset_bits  = 0,
320	  .size_bits    = 128 },
321	{ SERVICE_REC_FIELD(pkey),
322	  .offset_words = 6,
323	  .offset_bits  = 0,
324	  .size_bits    = 16 },
325	{ SERVICE_REC_FIELD(lease),
326	  .offset_words = 7,
327	  .offset_bits  = 0,
328	  .size_bits    = 32 },
329	{ SERVICE_REC_FIELD(key),
330	  .offset_words = 8,
331	  .offset_bits  = 0,
332	  .size_bits    = 128 },
333	{ SERVICE_REC_FIELD(name),
334	  .offset_words = 12,
335	  .offset_bits  = 0,
336	  .size_bits    = 64*8 },
337	{ SERVICE_REC_FIELD(data8),
338	  .offset_words = 28,
339	  .offset_bits  = 0,
340	  .size_bits    = 16*8 },
341	{ SERVICE_REC_FIELD(data16),
342	  .offset_words = 32,
343	  .offset_bits  = 0,
344	  .size_bits    = 8*16 },
345	{ SERVICE_REC_FIELD(data32),
346	  .offset_words = 36,
347	  .offset_bits  = 0,
348	  .size_bits    = 4*32 },
349	{ SERVICE_REC_FIELD(data64),
350	  .offset_words = 40,
351	  .offset_bits  = 0,
352	  .size_bits    = 2*64 },
353};
354
355static void free_sm_ah(struct kref *kref)
356{
357	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
358
359	ib_destroy_ah(sm_ah->ah);
360	kfree(sm_ah);
361}
362
363static void update_sm_ah(struct work_struct *work)
364{
365	struct ib_sa_port *port =
366		container_of(work, struct ib_sa_port, update_task);
367	struct ib_sa_sm_ah *new_ah, *old_ah;
368	struct ib_port_attr port_attr;
369	struct ib_ah_attr   ah_attr;
370
371	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
372		printk(KERN_WARNING "Couldn't query port\n");
373		return;
374	}
375
376	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
377	if (!new_ah) {
378		printk(KERN_WARNING "Couldn't allocate new SM AH\n");
379		return;
380	}
381
382	kref_init(&new_ah->ref);
383	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
384
385	memset(&ah_attr, 0, sizeof ah_attr);
386	ah_attr.dlid     = port_attr.sm_lid;
387	ah_attr.sl       = port_attr.sm_sl;
388	ah_attr.port_num = port->port_num;
389
390	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
391	if (IS_ERR(new_ah->ah)) {
392		printk(KERN_WARNING "Couldn't create new SM AH\n");
393		kfree(new_ah);
394		return;
395	}
396
397	spin_lock_irq(&port->ah_lock);
398	old_ah = port->sm_ah;
399	port->sm_ah = new_ah;
400	spin_unlock_irq(&port->ah_lock);
401
402	if (old_ah)
403		kref_put(&old_ah->ref, free_sm_ah);
404}
405
406static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
407{
408	if (event->event == IB_EVENT_PORT_ERR    ||
409	    event->event == IB_EVENT_PORT_ACTIVE ||
410	    event->event == IB_EVENT_LID_CHANGE  ||
411	    event->event == IB_EVENT_PKEY_CHANGE ||
412	    event->event == IB_EVENT_SM_CHANGE   ||
413	    event->event == IB_EVENT_CLIENT_REREGISTER) {
414		struct ib_sa_device *sa_dev;
415		sa_dev = container_of(handler, typeof(*sa_dev), event_handler);
416
417		schedule_work(&sa_dev->port[event->element.port_num -
418					    sa_dev->start_port].update_task);
419	}
420}
421
422void ib_sa_register_client(struct ib_sa_client *client)
423{
424	atomic_set(&client->users, 1);
425	init_completion(&client->comp);
426}
427EXPORT_SYMBOL(ib_sa_register_client);
428
429void ib_sa_unregister_client(struct ib_sa_client *client)
430{
431	ib_sa_client_put(client);
432	wait_for_completion(&client->comp);
433}
434EXPORT_SYMBOL(ib_sa_unregister_client);
435
436/**
437 * ib_sa_cancel_query - try to cancel an SA query
438 * @id:ID of query to cancel
439 * @query:query pointer to cancel
440 *
441 * Try to cancel an SA query.  If the id and query don't match up or
442 * the query has already completed, nothing is done.  Otherwise the
443 * query is canceled and will complete with a status of -EINTR.
444 */
445void ib_sa_cancel_query(int id, struct ib_sa_query *query)
446{
447	unsigned long flags;
448	struct ib_mad_agent *agent;
449	struct ib_mad_send_buf *mad_buf;
450
451	spin_lock_irqsave(&idr_lock, flags);
452	if (idr_find(&query_idr, id) != query) {
453		spin_unlock_irqrestore(&idr_lock, flags);
454		return;
455	}
456	agent = query->port->agent;
457	mad_buf = query->mad_buf;
458	spin_unlock_irqrestore(&idr_lock, flags);
459
460	ib_cancel_mad(agent, mad_buf);
461}
462EXPORT_SYMBOL(ib_sa_cancel_query);
463
464static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
465{
466	struct ib_sa_device *sa_dev;
467	struct ib_sa_port   *port;
468	unsigned long flags;
469	u8 src_path_mask;
470
471	sa_dev = ib_get_client_data(device, &sa_client);
472	if (!sa_dev)
473		return 0x7f;
474
475	port  = &sa_dev->port[port_num - sa_dev->start_port];
476	spin_lock_irqsave(&port->ah_lock, flags);
477	src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
478	spin_unlock_irqrestore(&port->ah_lock, flags);
479
480	return src_path_mask;
481}
482
483int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
484			 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
485{
486	int ret;
487	u16 gid_index;
488
489	memset(ah_attr, 0, sizeof *ah_attr);
490	ah_attr->dlid = be16_to_cpu(rec->dlid);
491	ah_attr->sl = rec->sl;
492	ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
493				 get_src_path_mask(device, port_num);
494	ah_attr->port_num = port_num;
495	ah_attr->static_rate = rec->rate;
496
497	if (rec->hop_limit > 1) {
498		ah_attr->ah_flags = IB_AH_GRH;
499		ah_attr->grh.dgid = rec->dgid;
500
501		ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
502					 &gid_index);
503		if (ret)
504			return ret;
505
506		ah_attr->grh.sgid_index    = gid_index;
507		ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
508		ah_attr->grh.hop_limit     = rec->hop_limit;
509		ah_attr->grh.traffic_class = rec->traffic_class;
510	}
511	return 0;
512}
513EXPORT_SYMBOL(ib_init_ah_from_path);
514
515static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
516{
517	unsigned long flags;
518
519	memset(mad, 0, sizeof *mad);
520
521	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
522	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
523	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
524
525	spin_lock_irqsave(&tid_lock, flags);
526	mad->mad_hdr.tid           =
527		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
528	spin_unlock_irqrestore(&tid_lock, flags);
529}
530
531static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
532{
533	unsigned long flags;
534	int ret, id;
535
536retry:
537	if (!idr_pre_get(&query_idr, gfp_mask))
538		return -ENOMEM;
539	spin_lock_irqsave(&idr_lock, flags);
540	ret = idr_get_new(&query_idr, query, &id);
541	spin_unlock_irqrestore(&idr_lock, flags);
542	if (ret == -EAGAIN)
543		goto retry;
544	if (ret)
545		return ret;
546
547	query->mad_buf->timeout_ms  = timeout_ms;
548	query->mad_buf->context[0] = query;
549	query->id = id;
550
551	spin_lock_irqsave(&query->port->ah_lock, flags);
552	kref_get(&query->port->sm_ah->ref);
553	query->sm_ah = query->port->sm_ah;
554	spin_unlock_irqrestore(&query->port->ah_lock, flags);
555
556	query->mad_buf->ah = query->sm_ah->ah;
557
558	ret = ib_post_send_mad(query->mad_buf, NULL);
559	if (ret) {
560		spin_lock_irqsave(&idr_lock, flags);
561		idr_remove(&query_idr, id);
562		spin_unlock_irqrestore(&idr_lock, flags);
563
564		kref_put(&query->sm_ah->ref, free_sm_ah);
565	}
566
567	/*
568	 * It's not safe to dereference query any more, because the
569	 * send may already have completed and freed the query in
570	 * another context.
571	 */
572	return ret ? ret : id;
573}
574
575static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
576				    int status,
577				    struct ib_sa_mad *mad)
578{
579	struct ib_sa_path_query *query =
580		container_of(sa_query, struct ib_sa_path_query, sa_query);
581
582	if (mad) {
583		struct ib_sa_path_rec rec;
584
585		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
586			  mad->data, &rec);
587		query->callback(status, &rec, query->context);
588	} else
589		query->callback(status, NULL, query->context);
590}
591
592static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
593{
594	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
595}
596
597/**
598 * ib_sa_path_rec_get - Start a Path get query
599 * @client:SA client
600 * @device:device to send query on
601 * @port_num: port number to send query on
602 * @rec:Path Record to send in query
603 * @comp_mask:component mask to send in query
604 * @timeout_ms:time to wait for response
605 * @gfp_mask:GFP mask to use for internal allocations
606 * @callback:function called when query completes, times out or is
607 * canceled
608 * @context:opaque user context passed to callback
609 * @sa_query:query context, used to cancel query
610 *
611 * Send a Path Record Get query to the SA to look up a path.  The
612 * callback function will be called when the query completes (or
613 * fails); status is 0 for a successful response, -EINTR if the query
614 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
615 * occurred sending the query.  The resp parameter of the callback is
616 * only valid if status is 0.
617 *
618 * If the return value of ib_sa_path_rec_get() is negative, it is an
619 * error code.  Otherwise it is a query ID that can be used to cancel
620 * the query.
621 */
622int ib_sa_path_rec_get(struct ib_sa_client *client,
623		       struct ib_device *device, u8 port_num,
624		       struct ib_sa_path_rec *rec,
625		       ib_sa_comp_mask comp_mask,
626		       int timeout_ms, gfp_t gfp_mask,
627		       void (*callback)(int status,
628					struct ib_sa_path_rec *resp,
629					void *context),
630		       void *context,
631		       struct ib_sa_query **sa_query)
632{
633	struct ib_sa_path_query *query;
634	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
635	struct ib_sa_port   *port;
636	struct ib_mad_agent *agent;
637	struct ib_sa_mad *mad;
638	int ret;
639
640	if (!sa_dev)
641		return -ENODEV;
642
643	port  = &sa_dev->port[port_num - sa_dev->start_port];
644	agent = port->agent;
645
646	query = kmalloc(sizeof *query, gfp_mask);
647	if (!query)
648		return -ENOMEM;
649
650	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
651						     0, IB_MGMT_SA_HDR,
652						     IB_MGMT_SA_DATA, gfp_mask);
653	if (!query->sa_query.mad_buf) {
654		ret = -ENOMEM;
655		goto err1;
656	}
657
658	ib_sa_client_get(client);
659	query->sa_query.client = client;
660	query->callback        = callback;
661	query->context         = context;
662
663	mad = query->sa_query.mad_buf->mad;
664	init_mad(mad, agent);
665
666	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
667	query->sa_query.release  = ib_sa_path_rec_release;
668	query->sa_query.port     = port;
669	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
670	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
671	mad->sa_hdr.comp_mask	 = comp_mask;
672
673	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
674
675	*sa_query = &query->sa_query;
676
677	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
678	if (ret < 0)
679		goto err2;
680
681	return ret;
682
683err2:
684	*sa_query = NULL;
685	ib_sa_client_put(query->sa_query.client);
686	ib_free_send_mad(query->sa_query.mad_buf);
687
688err1:
689	kfree(query);
690	return ret;
691}
692EXPORT_SYMBOL(ib_sa_path_rec_get);
693
694static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
695				    int status,
696				    struct ib_sa_mad *mad)
697{
698	struct ib_sa_service_query *query =
699		container_of(sa_query, struct ib_sa_service_query, sa_query);
700
701	if (mad) {
702		struct ib_sa_service_rec rec;
703
704		ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
705			  mad->data, &rec);
706		query->callback(status, &rec, query->context);
707	} else
708		query->callback(status, NULL, query->context);
709}
710
711static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
712{
713	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
714}
715
716/**
717 * ib_sa_service_rec_query - Start Service Record operation
718 * @client:SA client
719 * @device:device to send request on
720 * @port_num: port number to send request on
721 * @method:SA method - should be get, set, or delete
722 * @rec:Service Record to send in request
723 * @comp_mask:component mask to send in request
724 * @timeout_ms:time to wait for response
725 * @gfp_mask:GFP mask to use for internal allocations
726 * @callback:function called when request completes, times out or is
727 * canceled
728 * @context:opaque user context passed to callback
729 * @sa_query:request context, used to cancel request
730 *
731 * Send a Service Record set/get/delete to the SA to register,
732 * unregister or query a service record.
733 * The callback function will be called when the request completes (or
734 * fails); status is 0 for a successful response, -EINTR if the query
735 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
736 * occurred sending the query.  The resp parameter of the callback is
737 * only valid if status is 0.
738 *
739 * If the return value of ib_sa_service_rec_query() is negative, it is an
740 * error code.  Otherwise it is a request ID that can be used to cancel
741 * the query.
742 */
743int ib_sa_service_rec_query(struct ib_sa_client *client,
744			    struct ib_device *device, u8 port_num, u8 method,
745			    struct ib_sa_service_rec *rec,
746			    ib_sa_comp_mask comp_mask,
747			    int timeout_ms, gfp_t gfp_mask,
748			    void (*callback)(int status,
749					     struct ib_sa_service_rec *resp,
750					     void *context),
751			    void *context,
752			    struct ib_sa_query **sa_query)
753{
754	struct ib_sa_service_query *query;
755	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
756	struct ib_sa_port   *port;
757	struct ib_mad_agent *agent;
758	struct ib_sa_mad *mad;
759	int ret;
760
761	if (!sa_dev)
762		return -ENODEV;
763
764	port  = &sa_dev->port[port_num - sa_dev->start_port];
765	agent = port->agent;
766
767	if (method != IB_MGMT_METHOD_GET &&
768	    method != IB_MGMT_METHOD_SET &&
769	    method != IB_SA_METHOD_DELETE)
770		return -EINVAL;
771
772	query = kmalloc(sizeof *query, gfp_mask);
773	if (!query)
774		return -ENOMEM;
775
776	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
777						     0, IB_MGMT_SA_HDR,
778						     IB_MGMT_SA_DATA, gfp_mask);
779	if (!query->sa_query.mad_buf) {
780		ret = -ENOMEM;
781		goto err1;
782	}
783
784	ib_sa_client_get(client);
785	query->sa_query.client = client;
786	query->callback        = callback;
787	query->context         = context;
788
789	mad = query->sa_query.mad_buf->mad;
790	init_mad(mad, agent);
791
792	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
793	query->sa_query.release  = ib_sa_service_rec_release;
794	query->sa_query.port     = port;
795	mad->mad_hdr.method	 = method;
796	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
797	mad->sa_hdr.comp_mask	 = comp_mask;
798
799	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
800		rec, mad->data);
801
802	*sa_query = &query->sa_query;
803
804	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
805	if (ret < 0)
806		goto err2;
807
808	return ret;
809
810err2:
811	*sa_query = NULL;
812	ib_sa_client_put(query->sa_query.client);
813	ib_free_send_mad(query->sa_query.mad_buf);
814
815err1:
816	kfree(query);
817	return ret;
818}
819EXPORT_SYMBOL(ib_sa_service_rec_query);
820
821static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
822					int status,
823					struct ib_sa_mad *mad)
824{
825	struct ib_sa_mcmember_query *query =
826		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
827
828	if (mad) {
829		struct ib_sa_mcmember_rec rec;
830
831		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
832			  mad->data, &rec);
833		query->callback(status, &rec, query->context);
834	} else
835		query->callback(status, NULL, query->context);
836}
837
838static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
839{
840	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
841}
842
843int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
844			     struct ib_device *device, u8 port_num,
845			     u8 method,
846			     struct ib_sa_mcmember_rec *rec,
847			     ib_sa_comp_mask comp_mask,
848			     int timeout_ms, gfp_t gfp_mask,
849			     void (*callback)(int status,
850					      struct ib_sa_mcmember_rec *resp,
851					      void *context),
852			     void *context,
853			     struct ib_sa_query **sa_query)
854{
855	struct ib_sa_mcmember_query *query;
856	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
857	struct ib_sa_port   *port;
858	struct ib_mad_agent *agent;
859	struct ib_sa_mad *mad;
860	int ret;
861
862	if (!sa_dev)
863		return -ENODEV;
864
865	port  = &sa_dev->port[port_num - sa_dev->start_port];
866	agent = port->agent;
867
868	query = kmalloc(sizeof *query, gfp_mask);
869	if (!query)
870		return -ENOMEM;
871
872	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
873						     0, IB_MGMT_SA_HDR,
874						     IB_MGMT_SA_DATA, gfp_mask);
875	if (!query->sa_query.mad_buf) {
876		ret = -ENOMEM;
877		goto err1;
878	}
879
880	ib_sa_client_get(client);
881	query->sa_query.client = client;
882	query->callback        = callback;
883	query->context         = context;
884
885	mad = query->sa_query.mad_buf->mad;
886	init_mad(mad, agent);
887
888	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
889	query->sa_query.release  = ib_sa_mcmember_rec_release;
890	query->sa_query.port     = port;
891	mad->mad_hdr.method	 = method;
892	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
893	mad->sa_hdr.comp_mask	 = comp_mask;
894
895	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
896		rec, mad->data);
897
898	*sa_query = &query->sa_query;
899
900	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
901	if (ret < 0)
902		goto err2;
903
904	return ret;
905
906err2:
907	*sa_query = NULL;
908	ib_sa_client_put(query->sa_query.client);
909	ib_free_send_mad(query->sa_query.mad_buf);
910
911err1:
912	kfree(query);
913	return ret;
914}
915
916static void send_handler(struct ib_mad_agent *agent,
917			 struct ib_mad_send_wc *mad_send_wc)
918{
919	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
920	unsigned long flags;
921
922	if (query->callback)
923		switch (mad_send_wc->status) {
924		case IB_WC_SUCCESS:
925			/* No callback -- already got recv */
926			break;
927		case IB_WC_RESP_TIMEOUT_ERR:
928			query->callback(query, -ETIMEDOUT, NULL);
929			break;
930		case IB_WC_WR_FLUSH_ERR:
931			query->callback(query, -EINTR, NULL);
932			break;
933		default:
934			query->callback(query, -EIO, NULL);
935			break;
936		}
937
938	spin_lock_irqsave(&idr_lock, flags);
939	idr_remove(&query_idr, query->id);
940	spin_unlock_irqrestore(&idr_lock, flags);
941
942	ib_free_send_mad(mad_send_wc->send_buf);
943	kref_put(&query->sm_ah->ref, free_sm_ah);
944	ib_sa_client_put(query->client);
945	query->release(query);
946}
947
948static void recv_handler(struct ib_mad_agent *mad_agent,
949			 struct ib_mad_recv_wc *mad_recv_wc)
950{
951	struct ib_sa_query *query;
952	struct ib_mad_send_buf *mad_buf;
953
954	mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
955	query = mad_buf->context[0];
956
957	if (query->callback) {
958		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
959			query->callback(query,
960					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
961					-EINVAL : 0,
962					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
963		else
964			query->callback(query, -EIO, NULL);
965	}
966
967	ib_free_recv_mad(mad_recv_wc);
968}
969
970static void ib_sa_add_one(struct ib_device *device)
971{
972	struct ib_sa_device *sa_dev;
973	int s, e, i;
974
975	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
976		return;
977
978	if (device->node_type == RDMA_NODE_IB_SWITCH)
979		s = e = 0;
980	else {
981		s = 1;
982		e = device->phys_port_cnt;
983	}
984
985	sa_dev = kmalloc(sizeof *sa_dev +
986			 (e - s + 1) * sizeof (struct ib_sa_port),
987			 GFP_KERNEL);
988	if (!sa_dev)
989		return;
990
991	sa_dev->start_port = s;
992	sa_dev->end_port   = e;
993
994	for (i = 0; i <= e - s; ++i) {
995		sa_dev->port[i].sm_ah    = NULL;
996		sa_dev->port[i].port_num = i + s;
997		spin_lock_init(&sa_dev->port[i].ah_lock);
998
999		sa_dev->port[i].agent =
1000			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
1001					      NULL, 0, send_handler,
1002					      recv_handler, sa_dev);
1003		if (IS_ERR(sa_dev->port[i].agent))
1004			goto err;
1005
1006		INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
1007	}
1008
1009	ib_set_client_data(device, &sa_client, sa_dev);
1010
1011	/*
1012	 * We register our event handler after everything is set up,
1013	 * and then update our cached info after the event handler is
1014	 * registered to avoid any problems if a port changes state
1015	 * during our initialization.
1016	 */
1017
1018	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
1019	if (ib_register_event_handler(&sa_dev->event_handler))
1020		goto err;
1021
1022	for (i = 0; i <= e - s; ++i)
1023		update_sm_ah(&sa_dev->port[i].update_task);
1024
1025	return;
1026
1027err:
1028	while (--i >= 0)
1029		ib_unregister_mad_agent(sa_dev->port[i].agent);
1030
1031	kfree(sa_dev);
1032
1033	return;
1034}
1035
1036static void ib_sa_remove_one(struct ib_device *device)
1037{
1038	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1039	int i;
1040
1041	if (!sa_dev)
1042		return;
1043
1044	ib_unregister_event_handler(&sa_dev->event_handler);
1045
1046	flush_scheduled_work();
1047
1048	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
1049		ib_unregister_mad_agent(sa_dev->port[i].agent);
1050		kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
1051	}
1052
1053	kfree(sa_dev);
1054}
1055
1056static int __init ib_sa_init(void)
1057{
1058	int ret;
1059
1060	spin_lock_init(&idr_lock);
1061	spin_lock_init(&tid_lock);
1062
1063	get_random_bytes(&tid, sizeof tid);
1064
1065	ret = ib_register_client(&sa_client);
1066	if (ret) {
1067		printk(KERN_ERR "Couldn't register ib_sa client\n");
1068		goto err1;
1069	}
1070
1071	ret = mcast_init();
1072	if (ret) {
1073		printk(KERN_ERR "Couldn't initialize multicast handling\n");
1074		goto err2;
1075	}
1076
1077	return 0;
1078err2:
1079	ib_unregister_client(&sa_client);
1080err1:
1081	return ret;
1082}
1083
1084static void __exit ib_sa_cleanup(void)
1085{
1086	mcast_cleanup();
1087	ib_unregister_client(&sa_client);
1088	idr_destroy(&query_idr);
1089}
1090
1091module_init(ib_sa_init);
1092module_exit(ib_sa_cleanup);
1093