1/*
2 * Copyright (c) 2006 Intel Corporation.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/completion.h>
34#include <linux/dma-mapping.h>
35#include <linux/err.h>
36#include <linux/interrupt.h>
37#include <linux/pci.h>
38#include <linux/bitops.h>
39#include <linux/random.h>
40
41#include "sa.h"
42
43MODULE_AUTHOR("Sean Hefty");
44MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling");
45MODULE_LICENSE("Dual BSD/GPL");
46
47static void inform_add_one(struct ib_device *device);
48static void inform_remove_one(struct ib_device *device);
49
50static struct ib_client inform_client = {
51	.name   = "ib_notice",
52	.add    = inform_add_one,
53	.remove = inform_remove_one
54};
55
56static struct ib_sa_client	sa_client;
57static struct workqueue_struct	*inform_wq;
58
59struct inform_device;
60
61struct inform_port {
62	struct inform_device	*dev;
63	spinlock_t		lock;
64	struct rb_root		table;
65	atomic_t		refcount;
66	struct completion	comp;
67	u8			port_num;
68};
69
70struct inform_device {
71	struct ib_device	*device;
72	struct ib_event_handler	event_handler;
73	int			start_port;
74	int			end_port;
75	struct inform_port	port[0];
76};
77
78enum inform_state {
79	INFORM_IDLE,
80	INFORM_REGISTERING,
81	INFORM_MEMBER,
82	INFORM_BUSY,
83	INFORM_ERROR
84};
85
86struct inform_member;
87
88struct inform_group {
89	u16			trap_number;
90	struct rb_node		node;
91	struct inform_port	*port;
92	spinlock_t		lock;
93	struct work_struct	work;
94	struct list_head	pending_list;
95	struct list_head	active_list;
96	struct list_head	notice_list;
97	struct inform_member	*last_join;
98	int			members;
99	enum inform_state	join_state; /* State relative to SA */
100	atomic_t		refcount;
101	enum inform_state	state;
102	struct ib_sa_query	*query;
103	int			query_id;
104};
105
106struct inform_member {
107	struct ib_inform_info	info;
108	struct ib_sa_client	*client;
109	struct inform_group	*group;
110	struct list_head	list;
111	enum inform_state	state;
112	atomic_t		refcount;
113	struct completion	comp;
114};
115
116struct inform_notice {
117	struct list_head	list;
118	struct ib_sa_notice	notice;
119};
120
121static void reg_handler(int status, struct ib_sa_inform *inform,
122			 void *context);
123static void unreg_handler(int status, struct ib_sa_inform *inform,
124			  void *context);
125
126static struct inform_group *inform_find(struct inform_port *port,
127					u16 trap_number)
128{
129	struct rb_node *node = port->table.rb_node;
130	struct inform_group *group;
131
132	while (node) {
133		group = rb_entry(node, struct inform_group, node);
134		if (trap_number < group->trap_number)
135			node = node->rb_left;
136		else if (trap_number > group->trap_number)
137			node = node->rb_right;
138		else
139			return group;
140	}
141	return NULL;
142}
143
144static struct inform_group *inform_insert(struct inform_port *port,
145					  struct inform_group *group)
146{
147	struct rb_node **link = &port->table.rb_node;
148	struct rb_node *parent = NULL;
149	struct inform_group *cur_group;
150
151	while (*link) {
152		parent = *link;
153		cur_group = rb_entry(parent, struct inform_group, node);
154		if (group->trap_number < cur_group->trap_number)
155			link = &(*link)->rb_left;
156		else if (group->trap_number > cur_group->trap_number)
157			link = &(*link)->rb_right;
158		else
159			return cur_group;
160	}
161	rb_link_node(&group->node, parent, link);
162	rb_insert_color(&group->node, &port->table);
163	return NULL;
164}
165
166static void deref_port(struct inform_port *port)
167{
168	if (atomic_dec_and_test(&port->refcount))
169		complete(&port->comp);
170}
171
172static void release_group(struct inform_group *group)
173{
174	struct inform_port *port = group->port;
175	unsigned long flags;
176
177	spin_lock_irqsave(&port->lock, flags);
178	if (atomic_dec_and_test(&group->refcount)) {
179		rb_erase(&group->node, &port->table);
180		spin_unlock_irqrestore(&port->lock, flags);
181		kfree(group);
182		deref_port(port);
183	} else
184		spin_unlock_irqrestore(&port->lock, flags);
185}
186
187static void deref_member(struct inform_member *member)
188{
189	if (atomic_dec_and_test(&member->refcount))
190		complete(&member->comp);
191}
192
193static void queue_reg(struct inform_member *member)
194{
195	struct inform_group *group = member->group;
196	unsigned long flags;
197
198	spin_lock_irqsave(&group->lock, flags);
199	list_add(&member->list, &group->pending_list);
200	if (group->state == INFORM_IDLE) {
201		group->state = INFORM_BUSY;
202		atomic_inc(&group->refcount);
203		queue_work(inform_wq, &group->work);
204	}
205	spin_unlock_irqrestore(&group->lock, flags);
206}
207
208static int send_reg(struct inform_group *group, struct inform_member *member)
209{
210	struct inform_port *port = group->port;
211	struct ib_sa_inform inform;
212	int ret;
213
214	memset(&inform, 0, sizeof inform);
215	inform.lid_range_begin = cpu_to_be16(0xFFFF);
216	inform.is_generic = 1;
217	inform.subscribe = 1;
218	inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
219	inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number);
220	inform.trap.generic.resp_time = 19;
221	inform.trap.generic.producer_type =
222				cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
223
224	group->last_join = member;
225	ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
226				     port->port_num, &inform, 3000, GFP_KERNEL,
227				     reg_handler, group,&group->query);
228	if (ret >= 0) {
229		group->query_id = ret;
230		ret = 0;
231	}
232	return ret;
233}
234
235static int send_unreg(struct inform_group *group)
236{
237	struct inform_port *port = group->port;
238	struct ib_sa_inform inform;
239	int ret;
240
241	memset(&inform, 0, sizeof inform);
242	inform.lid_range_begin = cpu_to_be16(0xFFFF);
243	inform.is_generic = 1;
244	inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
245	inform.trap.generic.trap_num = cpu_to_be16(group->trap_number);
246	inform.trap.generic.qpn = IB_QP1;
247	inform.trap.generic.resp_time = 19;
248	inform.trap.generic.producer_type =
249				cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
250
251	ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
252				     port->port_num, &inform, 3000, GFP_KERNEL,
253				     unreg_handler, group, &group->query);
254	if (ret >= 0) {
255		group->query_id = ret;
256		ret = 0;
257	}
258	return ret;
259}
260
261static void join_group(struct inform_group *group, struct inform_member *member)
262{
263	member->state = INFORM_MEMBER;
264	group->members++;
265	list_move(&member->list, &group->active_list);
266}
267
268static int fail_join(struct inform_group *group, struct inform_member *member,
269		     int status)
270{
271	spin_lock_irq(&group->lock);
272	list_del_init(&member->list);
273	spin_unlock_irq(&group->lock);
274	return member->info.callback(status, &member->info, NULL);
275}
276
277static void process_group_error(struct inform_group *group)
278{
279	struct inform_member *member;
280	int ret;
281
282	spin_lock_irq(&group->lock);
283	while (!list_empty(&group->active_list)) {
284		member = list_entry(group->active_list.next,
285				    struct inform_member, list);
286		atomic_inc(&member->refcount);
287		list_del_init(&member->list);
288		group->members--;
289		member->state = INFORM_ERROR;
290		spin_unlock_irq(&group->lock);
291
292		ret = member->info.callback(-ENETRESET, &member->info, NULL);
293		deref_member(member);
294		if (ret)
295			ib_sa_unregister_inform_info(&member->info);
296		spin_lock_irq(&group->lock);
297	}
298
299	group->join_state = INFORM_IDLE;
300	group->state = INFORM_BUSY;
301	spin_unlock_irq(&group->lock);
302}
303
304/*
305 * Report a notice to all active subscribers.  We use a temporary list to
306 * handle unsubscription requests while the notice is being reported, which
307 * avoids holding the group lock while in the user's callback.
308 */
309static void process_notice(struct inform_group *group,
310			   struct inform_notice *info_notice)
311{
312	struct inform_member *member;
313	struct list_head list;
314	int ret;
315
316	INIT_LIST_HEAD(&list);
317
318	spin_lock_irq(&group->lock);
319	list_splice_init(&group->active_list, &list);
320	while (!list_empty(&list)) {
321
322		member = list_entry(list.next, struct inform_member, list);
323		atomic_inc(&member->refcount);
324		list_move(&member->list, &group->active_list);
325		spin_unlock_irq(&group->lock);
326
327		ret = member->info.callback(0, &member->info,
328					    &info_notice->notice);
329		deref_member(member);
330		if (ret)
331			ib_sa_unregister_inform_info(&member->info);
332		spin_lock_irq(&group->lock);
333	}
334	spin_unlock_irq(&group->lock);
335}
336
337static void inform_work_handler(struct work_struct *work)
338{
339	struct inform_group *group;
340	struct inform_member *member;
341	struct ib_inform_info *info;
342	struct inform_notice *info_notice;
343	int status, ret;
344
345	group = container_of(work, typeof(*group), work);
346retest:
347	spin_lock_irq(&group->lock);
348	while (!list_empty(&group->pending_list) ||
349	       !list_empty(&group->notice_list) ||
350	       (group->state == INFORM_ERROR)) {
351
352		if (group->state == INFORM_ERROR) {
353			spin_unlock_irq(&group->lock);
354			process_group_error(group);
355			goto retest;
356		}
357
358		if (!list_empty(&group->notice_list)) {
359			info_notice = list_entry(group->notice_list.next,
360						 struct inform_notice, list);
361			list_del(&info_notice->list);
362			spin_unlock_irq(&group->lock);
363			process_notice(group, info_notice);
364			kfree(info_notice);
365			goto retest;
366		}
367
368		member = list_entry(group->pending_list.next,
369				    struct inform_member, list);
370		info = &member->info;
371		atomic_inc(&member->refcount);
372
373		if (group->join_state == INFORM_MEMBER) {
374			join_group(group, member);
375			spin_unlock_irq(&group->lock);
376			ret = info->callback(0, info, NULL);
377		} else {
378			spin_unlock_irq(&group->lock);
379			status = send_reg(group, member);
380			if (!status) {
381				deref_member(member);
382				return;
383			}
384			ret = fail_join(group, member, status);
385		}
386
387		deref_member(member);
388		if (ret)
389			ib_sa_unregister_inform_info(&member->info);
390		spin_lock_irq(&group->lock);
391	}
392
393	if (!group->members && (group->join_state == INFORM_MEMBER)) {
394		group->join_state = INFORM_IDLE;
395		spin_unlock_irq(&group->lock);
396		if (send_unreg(group))
397			goto retest;
398	} else {
399		group->state = INFORM_IDLE;
400		spin_unlock_irq(&group->lock);
401		release_group(group);
402	}
403}
404
405/*
406 * Fail a join request if it is still active - at the head of the pending queue.
407 */
408static void process_join_error(struct inform_group *group, int status)
409{
410	struct inform_member *member;
411	int ret;
412
413	spin_lock_irq(&group->lock);
414	member = list_entry(group->pending_list.next,
415			    struct inform_member, list);
416	if (group->last_join == member) {
417		atomic_inc(&member->refcount);
418		list_del_init(&member->list);
419		spin_unlock_irq(&group->lock);
420		ret = member->info.callback(status, &member->info, NULL);
421		deref_member(member);
422		if (ret)
423			ib_sa_unregister_inform_info(&member->info);
424	} else
425		spin_unlock_irq(&group->lock);
426}
427
428static void reg_handler(int status, struct ib_sa_inform *inform, void *context)
429{
430	struct inform_group *group = context;
431
432	if (status)
433		process_join_error(group, status);
434	else
435		group->join_state = INFORM_MEMBER;
436
437	inform_work_handler(&group->work);
438}
439
440static void unreg_handler(int status, struct ib_sa_inform *rec, void *context)
441{
442	struct inform_group *group = context;
443
444	inform_work_handler(&group->work);
445}
446
447int notice_dispatch(struct ib_device *device, u8 port_num,
448		    struct ib_sa_notice *notice)
449{
450	struct inform_device *dev;
451	struct inform_port *port;
452	struct inform_group *group;
453	struct inform_notice *info_notice;
454
455	dev = ib_get_client_data(device, &inform_client);
456	if (!dev)
457		return 0; /* No one to give notice to. */
458
459	port = &dev->port[port_num - dev->start_port];
460	spin_lock_irq(&port->lock);
461	group = inform_find(port, __be16_to_cpu(notice->trap.
462						generic.trap_num));
463	if (!group) {
464		spin_unlock_irq(&port->lock);
465		return 0;
466	}
467
468	atomic_inc(&group->refcount);
469	spin_unlock_irq(&port->lock);
470
471	info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL);
472	if (!info_notice) {
473		release_group(group);
474		return -ENOMEM;
475	}
476
477	info_notice->notice = *notice;
478
479	spin_lock_irq(&group->lock);
480	list_add(&info_notice->list, &group->notice_list);
481	if (group->state == INFORM_IDLE) {
482		group->state = INFORM_BUSY;
483		spin_unlock_irq(&group->lock);
484		inform_work_handler(&group->work);
485	} else {
486		spin_unlock_irq(&group->lock);
487		release_group(group);
488	}
489
490	return 0;
491}
492
493static struct inform_group *acquire_group(struct inform_port *port,
494					  u16 trap_number, gfp_t gfp_mask)
495{
496	struct inform_group *group, *cur_group;
497	unsigned long flags;
498
499	spin_lock_irqsave(&port->lock, flags);
500	group = inform_find(port, trap_number);
501	if (group)
502		goto found;
503	spin_unlock_irqrestore(&port->lock, flags);
504
505	group = kzalloc(sizeof *group, gfp_mask);
506	if (!group)
507		return NULL;
508
509	group->port = port;
510	group->trap_number = trap_number;
511	INIT_LIST_HEAD(&group->pending_list);
512	INIT_LIST_HEAD(&group->active_list);
513	INIT_LIST_HEAD(&group->notice_list);
514	INIT_WORK(&group->work, inform_work_handler);
515	spin_lock_init(&group->lock);
516
517	spin_lock_irqsave(&port->lock, flags);
518	cur_group = inform_insert(port, group);
519	if (cur_group) {
520		kfree(group);
521		group = cur_group;
522	} else
523		atomic_inc(&port->refcount);
524found:
525	atomic_inc(&group->refcount);
526	spin_unlock_irqrestore(&port->lock, flags);
527	return group;
528}
529
530/*
531 * We serialize all join requests to a single group to make our lives much
532 * easier.  Otherwise, two users could try to join the same group
533 * simultaneously, with different configurations, one could leave while the
534 * join is in progress, etc., which makes locking around error recovery
535 * difficult.
536 */
537struct ib_inform_info *
538ib_sa_register_inform_info(struct ib_sa_client *client,
539			   struct ib_device *device, u8 port_num,
540			   u16 trap_number, gfp_t gfp_mask,
541			   int (*callback)(int status,
542					   struct ib_inform_info *info,
543					   struct ib_sa_notice *notice),
544			   void *context)
545{
546	struct inform_device *dev;
547	struct inform_member *member;
548	struct ib_inform_info *info;
549	int ret;
550
551	dev = ib_get_client_data(device, &inform_client);
552	if (!dev)
553		return ERR_PTR(-ENODEV);
554
555	member = kzalloc(sizeof *member, gfp_mask);
556	if (!member)
557		return ERR_PTR(-ENOMEM);
558
559	ib_sa_client_get(client);
560	member->client = client;
561	member->info.trap_number = trap_number;
562	member->info.callback = callback;
563	member->info.context = context;
564	init_completion(&member->comp);
565	atomic_set(&member->refcount, 1);
566	member->state = INFORM_REGISTERING;
567
568	member->group = acquire_group(&dev->port[port_num - dev->start_port],
569				      trap_number, gfp_mask);
570	if (!member->group) {
571		ret = -ENOMEM;
572		goto err;
573	}
574
575	/*
576	 * The user will get the info structure in their callback.  They
577	 * could then free the info structure before we can return from
578	 * this routine.  So we save the pointer to return before queuing
579	 * any callback.
580	 */
581	info = &member->info;
582	queue_reg(member);
583	return info;
584
585err:
586	ib_sa_client_put(member->client);
587	kfree(member);
588	return ERR_PTR(ret);
589}
590EXPORT_SYMBOL(ib_sa_register_inform_info);
591
592void ib_sa_unregister_inform_info(struct ib_inform_info *info)
593{
594	struct inform_member *member;
595	struct inform_group *group;
596
597	member = container_of(info, struct inform_member, info);
598	group = member->group;
599
600	spin_lock_irq(&group->lock);
601	if (member->state == INFORM_MEMBER)
602		group->members--;
603
604	list_del_init(&member->list);
605
606	if (group->state == INFORM_IDLE) {
607		group->state = INFORM_BUSY;
608		spin_unlock_irq(&group->lock);
609		/* Continue to hold reference on group until callback */
610		queue_work(inform_wq, &group->work);
611	} else {
612		spin_unlock_irq(&group->lock);
613		release_group(group);
614	}
615
616	deref_member(member);
617	wait_for_completion(&member->comp);
618	ib_sa_client_put(member->client);
619	kfree(member);
620}
621EXPORT_SYMBOL(ib_sa_unregister_inform_info);
622
623static void inform_groups_lost(struct inform_port *port)
624{
625	struct inform_group *group;
626	struct rb_node *node;
627	unsigned long flags;
628
629	spin_lock_irqsave(&port->lock, flags);
630	for (node = rb_first(&port->table); node; node = rb_next(node)) {
631		group = rb_entry(node, struct inform_group, node);
632		spin_lock(&group->lock);
633		if (group->state == INFORM_IDLE) {
634			atomic_inc(&group->refcount);
635			queue_work(inform_wq, &group->work);
636		}
637		group->state = INFORM_ERROR;
638		spin_unlock(&group->lock);
639	}
640	spin_unlock_irqrestore(&port->lock, flags);
641}
642
643static void inform_event_handler(struct ib_event_handler *handler,
644				struct ib_event *event)
645{
646	struct inform_device *dev;
647
648	dev = container_of(handler, struct inform_device, event_handler);
649
650	switch (event->event) {
651	case IB_EVENT_PORT_ERR:
652	case IB_EVENT_LID_CHANGE:
653	case IB_EVENT_SM_CHANGE:
654	case IB_EVENT_CLIENT_REREGISTER:
655		inform_groups_lost(&dev->port[event->element.port_num -
656					      dev->start_port]);
657		break;
658	default:
659		break;
660	}
661}
662
663static void inform_add_one(struct ib_device *device)
664{
665	struct inform_device *dev;
666	struct inform_port *port;
667	int i;
668
669	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
670		return;
671
672	dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
673		      GFP_KERNEL);
674	if (!dev)
675		return;
676
677	if (device->node_type == RDMA_NODE_IB_SWITCH)
678		dev->start_port = dev->end_port = 0;
679	else {
680		dev->start_port = 1;
681		dev->end_port = device->phys_port_cnt;
682	}
683
684	for (i = 0; i <= dev->end_port - dev->start_port; i++) {
685		port = &dev->port[i];
686		port->dev = dev;
687		port->port_num = dev->start_port + i;
688		spin_lock_init(&port->lock);
689		port->table = RB_ROOT;
690		init_completion(&port->comp);
691		atomic_set(&port->refcount, 1);
692	}
693
694	dev->device = device;
695	ib_set_client_data(device, &inform_client, dev);
696
697	INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler);
698	ib_register_event_handler(&dev->event_handler);
699}
700
701static void inform_remove_one(struct ib_device *device)
702{
703	struct inform_device *dev;
704	struct inform_port *port;
705	int i;
706
707	dev = ib_get_client_data(device, &inform_client);
708	if (!dev)
709		return;
710
711	ib_unregister_event_handler(&dev->event_handler);
712	flush_workqueue(inform_wq);
713
714	for (i = 0; i <= dev->end_port - dev->start_port; i++) {
715		port = &dev->port[i];
716		deref_port(port);
717		wait_for_completion(&port->comp);
718	}
719
720	kfree(dev);
721}
722
723int notice_init(void)
724{
725	int ret;
726
727	inform_wq = create_singlethread_workqueue("ib_inform");
728	if (!inform_wq)
729		return -ENOMEM;
730
731	ib_sa_register_client(&sa_client);
732
733	ret = ib_register_client(&inform_client);
734	if (ret)
735		goto err;
736	return 0;
737
738err:
739	ib_sa_unregister_client(&sa_client);
740	destroy_workqueue(inform_wq);
741	return ret;
742}
743
744void notice_cleanup(void)
745{
746	ib_unregister_client(&inform_client);
747	ib_sa_unregister_client(&sa_client);
748	destroy_workqueue(inform_wq);
749}
750