1/*
2 *  IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 *  Functions for EQs, NEQs and interrupts
5 *
6 *  Authors: Heiko J Schick <schickhj@de.ibm.com>
7 *           Khadija Souissi <souissi@de.ibm.com>
8 *
9 *  Copyright (c) 2005 IBM Corporation
10 *
11 *  All rights reserved.
12 *
13 *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 *  BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "ehca_classes.h"
43#include "ehca_irq.h"
44#include "ehca_iverbs.h"
45#include "ehca_tools.h"
46#include "hcp_if.h"
47#include "hipz_fns.h"
48#include "ipz_pt_fn.h"
49
50#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM(1,1)
51#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM(8,31)
52#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM(2,7)
53#define EQE_CQ_NUMBER          EHCA_BMASK_IBM(8,31)
54#define EQE_QP_NUMBER          EHCA_BMASK_IBM(8,31)
55#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32,63)
56#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32,63)
57
58#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM(1,1)
59#define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
60#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
61#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
62
63#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
64#define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
65
66static void queue_comp_task(struct ehca_cq *__cq);
67
68static struct ehca_comp_pool* pool;
69#ifdef CONFIG_HOTPLUG_CPU
70static struct notifier_block comp_pool_callback_nb;
71#endif
72
73static inline void comp_event_callback(struct ehca_cq *cq)
74{
75	if (!cq->ib_cq.comp_handler)
76		return;
77
78	spin_lock(&cq->cb_lock);
79	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
80	spin_unlock(&cq->cb_lock);
81
82	return;
83}
84
85static void print_error_data(struct ehca_shca * shca, void* data,
86			     u64* rblock, int length)
87{
88	u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
89	u64 resource = rblock[1];
90
91	switch (type) {
92	case 0x1: /* Queue Pair */
93	{
94		struct ehca_qp *qp = (struct ehca_qp*)data;
95
96		/* only print error data if AER is set */
97		if (rblock[6] == 0)
98			return;
99
100		ehca_err(&shca->ib_device,
101			 "QP 0x%x (resource=%lx) has errors.",
102			 qp->ib_qp.qp_num, resource);
103		break;
104	}
105	case 0x4: /* Completion Queue */
106	{
107		struct ehca_cq *cq = (struct ehca_cq*)data;
108
109		ehca_err(&shca->ib_device,
110			 "CQ 0x%x (resource=%lx) has errors.",
111			 cq->cq_number, resource);
112		break;
113	}
114	default:
115		ehca_err(&shca->ib_device,
116			 "Unknown errror type: %lx on %s.",
117			 type, shca->ib_device.name);
118		break;
119	}
120
121	ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
122	ehca_err(&shca->ib_device, "EHCA ----- error data begin "
123		 "---------------------------------------------------");
124	ehca_dmp(rblock, length, "resource=%lx", resource);
125	ehca_err(&shca->ib_device, "EHCA ----- error data end "
126		 "----------------------------------------------------");
127
128	return;
129}
130
131int ehca_error_data(struct ehca_shca *shca, void *data,
132		    u64 resource)
133{
134
135	unsigned long ret;
136	u64 *rblock;
137	unsigned long block_count;
138
139	rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
140	if (!rblock) {
141		ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
142		ret = -ENOMEM;
143		goto error_data1;
144	}
145
146	/* rblock must be 4K aligned and should be 4K large */
147	ret = hipz_h_error_data(shca->ipz_hca_handle,
148				resource,
149				rblock,
150				&block_count);
151
152	if (ret == H_R_STATE)
153		ehca_err(&shca->ib_device,
154			 "No error data is available: %lx.", resource);
155	else if (ret == H_SUCCESS) {
156		int length;
157
158		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
159
160		if (length > EHCA_PAGESIZE)
161			length = EHCA_PAGESIZE;
162
163		print_error_data(shca, data, rblock, length);
164	} else
165		ehca_err(&shca->ib_device,
166			 "Error data could not be fetched: %lx", resource);
167
168	ehca_free_fw_ctrlblock(rblock);
169
170error_data1:
171	return ret;
172
173}
174
175static void qp_event_callback(struct ehca_shca *shca,
176			      u64 eqe,
177			      enum ib_event_type event_type)
178{
179	struct ib_event event;
180	struct ehca_qp *qp;
181	unsigned long flags;
182	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
183
184	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
185	qp = idr_find(&ehca_qp_idr, token);
186	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
187
188
189	if (!qp)
190		return;
191
192	ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
193
194	if (!qp->ib_qp.event_handler)
195		return;
196
197	event.device     = &shca->ib_device;
198	event.event      = event_type;
199	event.element.qp = &qp->ib_qp;
200
201	qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
202
203	return;
204}
205
206static void cq_event_callback(struct ehca_shca *shca,
207			      u64 eqe)
208{
209	struct ehca_cq *cq;
210	unsigned long flags;
211	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
212
213	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
214	cq = idr_find(&ehca_cq_idr, token);
215	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
216
217	if (!cq)
218		return;
219
220	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
221
222	return;
223}
224
225static void parse_identifier(struct ehca_shca *shca, u64 eqe)
226{
227	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
228
229	switch (identifier) {
230	case 0x02: /* path migrated */
231		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
232		break;
233	case 0x03: /* communication established */
234		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
235		break;
236	case 0x04: /* send queue drained */
237		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
238		break;
239	case 0x05: /* QP error */
240	case 0x06: /* QP error */
241		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
242		break;
243	case 0x07: /* CQ error */
244	case 0x08: /* CQ error */
245		cq_event_callback(shca, eqe);
246		break;
247	case 0x09: /* MRMWPTE error */
248		ehca_err(&shca->ib_device, "MRMWPTE error.");
249		break;
250	case 0x0A: /* port event */
251		ehca_err(&shca->ib_device, "Port event.");
252		break;
253	case 0x0B: /* MR access error */
254		ehca_err(&shca->ib_device, "MR access error.");
255		break;
256	case 0x0C: /* EQ error */
257		ehca_err(&shca->ib_device, "EQ error.");
258		break;
259	case 0x0D: /* P/Q_Key mismatch */
260		ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
261		break;
262	case 0x10: /* sampling complete */
263		ehca_err(&shca->ib_device, "Sampling complete.");
264		break;
265	case 0x11: /* unaffiliated access error */
266		ehca_err(&shca->ib_device, "Unaffiliated access error.");
267		break;
268	case 0x12: /* path migrating error */
269		ehca_err(&shca->ib_device, "Path migration error.");
270		break;
271	case 0x13: /* interface trace stopped */
272		ehca_err(&shca->ib_device, "Interface trace stopped.");
273		break;
274	case 0x14: /* first error capture info available */
275	default:
276		ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
277			 identifier, shca->ib_device.name);
278		break;
279	}
280
281	return;
282}
283
284static void parse_ec(struct ehca_shca *shca, u64 eqe)
285{
286	struct ib_event event;
287	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
288	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
289
290	switch (ec) {
291	case 0x30: /* port availability change */
292		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
293			ehca_info(&shca->ib_device,
294				  "port %x is active.", port);
295			event.device = &shca->ib_device;
296			event.event = IB_EVENT_PORT_ACTIVE;
297			event.element.port_num = port;
298			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
299			ib_dispatch_event(&event);
300		} else {
301			ehca_info(&shca->ib_device,
302				  "port %x is inactive.", port);
303			event.device = &shca->ib_device;
304			event.event = IB_EVENT_PORT_ERR;
305			event.element.port_num = port;
306			shca->sport[port - 1].port_state = IB_PORT_DOWN;
307			ib_dispatch_event(&event);
308		}
309		break;
310	case 0x31:
311		/* port configuration change
312		 * disruptive change is caused by
313		 * LID, PKEY or SM change
314		 */
315		ehca_warn(&shca->ib_device,
316			  "disruptive port %x configuration change", port);
317
318		ehca_info(&shca->ib_device,
319			  "port %x is inactive.", port);
320		event.device = &shca->ib_device;
321		event.event = IB_EVENT_PORT_ERR;
322		event.element.port_num = port;
323		shca->sport[port - 1].port_state = IB_PORT_DOWN;
324		ib_dispatch_event(&event);
325
326		ehca_info(&shca->ib_device,
327			  "port %x is active.", port);
328		event.device = &shca->ib_device;
329		event.event = IB_EVENT_PORT_ACTIVE;
330		event.element.port_num = port;
331		shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
332		ib_dispatch_event(&event);
333		break;
334	case 0x32: /* adapter malfunction */
335		ehca_err(&shca->ib_device, "Adapter malfunction.");
336		break;
337	case 0x33:  /* trace stopped */
338		ehca_err(&shca->ib_device, "Traced stopped.");
339		break;
340	default:
341		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
342			 ec, shca->ib_device.name);
343		break;
344	}
345
346	return;
347}
348
349static inline void reset_eq_pending(struct ehca_cq *cq)
350{
351	u64 CQx_EP;
352	struct h_galpa gal = cq->galpas.kernel;
353
354	hipz_galpa_store_cq(gal, cqx_ep, 0x0);
355	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
356
357	return;
358}
359
360irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)
361{
362	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
363
364	tasklet_hi_schedule(&shca->neq.interrupt_task);
365
366	return IRQ_HANDLED;
367}
368
369void ehca_tasklet_neq(unsigned long data)
370{
371	struct ehca_shca *shca = (struct ehca_shca*)data;
372	struct ehca_eqe *eqe;
373	u64 ret;
374
375	eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
376
377	while (eqe) {
378		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
379			parse_ec(shca, eqe->entry);
380
381		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
382	}
383
384	ret = hipz_h_reset_event(shca->ipz_hca_handle,
385				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
386
387	if (ret != H_SUCCESS)
388		ehca_err(&shca->ib_device, "Can't clear notification events.");
389
390	return;
391}
392
393irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
394{
395	struct ehca_shca *shca = (struct ehca_shca*)dev_id;
396
397	tasklet_hi_schedule(&shca->eq.interrupt_task);
398
399	return IRQ_HANDLED;
400}
401
402
403static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
404{
405	u64 eqe_value;
406	u32 token;
407	unsigned long flags;
408	struct ehca_cq *cq;
409
410	eqe_value = eqe->entry;
411	ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
412	if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
413		ehca_dbg(&shca->ib_device, "Got completion event");
414		token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
415		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
416		cq = idr_find(&ehca_cq_idr, token);
417		if (cq == NULL) {
418			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
419			ehca_err(&shca->ib_device,
420				 "Invalid eqe for non-existing cq token=%x",
421				 token);
422			return;
423		}
424		reset_eq_pending(cq);
425		cq->nr_events++;
426		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
427		if (ehca_scaling_code)
428			queue_comp_task(cq);
429		else {
430			comp_event_callback(cq);
431			spin_lock_irqsave(&ehca_cq_idr_lock, flags);
432			cq->nr_events--;
433			if (!cq->nr_events)
434				wake_up(&cq->wait_completion);
435			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
436		}
437	} else {
438		ehca_dbg(&shca->ib_device, "Got non completion event");
439		parse_identifier(shca, eqe_value);
440	}
441}
442
443void ehca_process_eq(struct ehca_shca *shca, int is_irq)
444{
445	struct ehca_eq *eq = &shca->eq;
446	struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
447	u64 eqe_value;
448	unsigned long flags;
449	int eqe_cnt, i;
450	int eq_empty = 0;
451
452	spin_lock_irqsave(&eq->irq_spinlock, flags);
453	if (is_irq) {
454		const int max_query_cnt = 100;
455		int query_cnt = 0;
456		int int_state = 1;
457		do {
458			int_state = hipz_h_query_int_state(
459				shca->ipz_hca_handle, eq->ist);
460			query_cnt++;
461			iosync();
462		} while (int_state && query_cnt < max_query_cnt);
463		if (unlikely((query_cnt == max_query_cnt)))
464			ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
465				 int_state, query_cnt);
466	}
467
468	/* read out all eqes */
469	eqe_cnt = 0;
470	do {
471		u32 token;
472		eqe_cache[eqe_cnt].eqe =
473			(struct ehca_eqe *)ehca_poll_eq(shca, eq);
474		if (!eqe_cache[eqe_cnt].eqe)
475			break;
476		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
477		if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
478			token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
479			spin_lock(&ehca_cq_idr_lock);
480			eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
481			if (!eqe_cache[eqe_cnt].cq) {
482				spin_unlock(&ehca_cq_idr_lock);
483				ehca_err(&shca->ib_device,
484					 "Invalid eqe for non-existing cq "
485					 "token=%x", token);
486				continue;
487			}
488			eqe_cache[eqe_cnt].cq->nr_events++;
489			spin_unlock(&ehca_cq_idr_lock);
490		} else
491			eqe_cache[eqe_cnt].cq = NULL;
492		eqe_cnt++;
493	} while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
494	if (!eqe_cnt) {
495		if (is_irq)
496			ehca_dbg(&shca->ib_device,
497				 "No eqe found for irq event");
498		goto unlock_irq_spinlock;
499	} else if (!is_irq)
500		ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
501	if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
502		ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
503	/* enable irq for new packets */
504	for (i = 0; i < eqe_cnt; i++) {
505		if (eq->eqe_cache[i].cq)
506			reset_eq_pending(eq->eqe_cache[i].cq);
507	}
508	/* check eq */
509	spin_lock(&eq->spinlock);
510	eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
511	spin_unlock(&eq->spinlock);
512	/* call completion handler for cached eqes */
513	for (i = 0; i < eqe_cnt; i++)
514		if (eq->eqe_cache[i].cq) {
515			if (ehca_scaling_code)
516				queue_comp_task(eq->eqe_cache[i].cq);
517			else {
518				struct ehca_cq *cq = eq->eqe_cache[i].cq;
519				comp_event_callback(cq);
520				spin_lock(&ehca_cq_idr_lock);
521				cq->nr_events--;
522				if (!cq->nr_events)
523					wake_up(&cq->wait_completion);
524				spin_unlock(&ehca_cq_idr_lock);
525			}
526		} else {
527			ehca_dbg(&shca->ib_device, "Got non completion event");
528			parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
529		}
530	/* poll eq if not empty */
531	if (eq_empty)
532		goto unlock_irq_spinlock;
533	do {
534		struct ehca_eqe *eqe;
535		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
536		if (!eqe)
537			break;
538		process_eqe(shca, eqe);
539	} while (1);
540
541unlock_irq_spinlock:
542	spin_unlock_irqrestore(&eq->irq_spinlock, flags);
543}
544
545void ehca_tasklet_eq(unsigned long data)
546{
547	ehca_process_eq((struct ehca_shca*)data, 1);
548}
549
550static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
551{
552	int cpu;
553	unsigned long flags;
554
555	WARN_ON_ONCE(!in_interrupt());
556	if (ehca_debug_level)
557		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
558
559	spin_lock_irqsave(&pool->last_cpu_lock, flags);
560	cpu = next_cpu(pool->last_cpu, cpu_online_map);
561	if (cpu == NR_CPUS)
562		cpu = first_cpu(cpu_online_map);
563	pool->last_cpu = cpu;
564	spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
565
566	return cpu;
567}
568
569static void __queue_comp_task(struct ehca_cq *__cq,
570			      struct ehca_cpu_comp_task *cct)
571{
572	unsigned long flags;
573
574	spin_lock_irqsave(&cct->task_lock, flags);
575	spin_lock(&__cq->task_lock);
576
577	if (__cq->nr_callbacks == 0) {
578		__cq->nr_callbacks++;
579		list_add_tail(&__cq->entry, &cct->cq_list);
580		cct->cq_jobs++;
581		wake_up(&cct->wait_queue);
582	} else
583		__cq->nr_callbacks++;
584
585	spin_unlock(&__cq->task_lock);
586	spin_unlock_irqrestore(&cct->task_lock, flags);
587}
588
589static void queue_comp_task(struct ehca_cq *__cq)
590{
591	int cpu_id;
592	struct ehca_cpu_comp_task *cct;
593	int cq_jobs;
594	unsigned long flags;
595
596	cpu_id = find_next_online_cpu(pool);
597	BUG_ON(!cpu_online(cpu_id));
598
599	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
600	BUG_ON(!cct);
601
602	spin_lock_irqsave(&cct->task_lock, flags);
603	cq_jobs = cct->cq_jobs;
604	spin_unlock_irqrestore(&cct->task_lock, flags);
605	if (cq_jobs > 0) {
606		cpu_id = find_next_online_cpu(pool);
607		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
608		BUG_ON(!cct);
609	}
610
611	__queue_comp_task(__cq, cct);
612}
613
614static void run_comp_task(struct ehca_cpu_comp_task* cct)
615{
616	struct ehca_cq *cq;
617	unsigned long flags;
618
619	spin_lock_irqsave(&cct->task_lock, flags);
620
621	while (!list_empty(&cct->cq_list)) {
622		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
623		spin_unlock_irqrestore(&cct->task_lock, flags);
624		comp_event_callback(cq);
625
626		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
627		cq->nr_events--;
628		if (!cq->nr_events)
629			wake_up(&cq->wait_completion);
630		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
631
632		spin_lock_irqsave(&cct->task_lock, flags);
633		spin_lock(&cq->task_lock);
634		cq->nr_callbacks--;
635		if (!cq->nr_callbacks) {
636			list_del_init(cct->cq_list.next);
637			cct->cq_jobs--;
638		}
639		spin_unlock(&cq->task_lock);
640	}
641
642	spin_unlock_irqrestore(&cct->task_lock, flags);
643}
644
645static int comp_task(void *__cct)
646{
647	struct ehca_cpu_comp_task* cct = __cct;
648	int cql_empty;
649	DECLARE_WAITQUEUE(wait, current);
650
651	set_current_state(TASK_INTERRUPTIBLE);
652	while(!kthread_should_stop()) {
653		add_wait_queue(&cct->wait_queue, &wait);
654
655		spin_lock_irq(&cct->task_lock);
656		cql_empty = list_empty(&cct->cq_list);
657		spin_unlock_irq(&cct->task_lock);
658		if (cql_empty)
659			schedule();
660		else
661			__set_current_state(TASK_RUNNING);
662
663		remove_wait_queue(&cct->wait_queue, &wait);
664
665		spin_lock_irq(&cct->task_lock);
666		cql_empty = list_empty(&cct->cq_list);
667		spin_unlock_irq(&cct->task_lock);
668		if (!cql_empty)
669			run_comp_task(__cct);
670
671		set_current_state(TASK_INTERRUPTIBLE);
672	}
673	__set_current_state(TASK_RUNNING);
674
675	return 0;
676}
677
678static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
679					    int cpu)
680{
681	struct ehca_cpu_comp_task *cct;
682
683	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
684	spin_lock_init(&cct->task_lock);
685	INIT_LIST_HEAD(&cct->cq_list);
686	init_waitqueue_head(&cct->wait_queue);
687	cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
688
689	return cct->task;
690}
691
692static void destroy_comp_task(struct ehca_comp_pool *pool,
693			      int cpu)
694{
695	struct ehca_cpu_comp_task *cct;
696	struct task_struct *task;
697	unsigned long flags_cct;
698
699	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
700
701	spin_lock_irqsave(&cct->task_lock, flags_cct);
702
703	task = cct->task;
704	cct->task = NULL;
705	cct->cq_jobs = 0;
706
707	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
708
709	if (task)
710		kthread_stop(task);
711}
712
713#ifdef CONFIG_HOTPLUG_CPU
714static void take_over_work(struct ehca_comp_pool *pool,
715			   int cpu)
716{
717	struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
718	LIST_HEAD(list);
719	struct ehca_cq *cq;
720	unsigned long flags_cct;
721
722	spin_lock_irqsave(&cct->task_lock, flags_cct);
723
724	list_splice_init(&cct->cq_list, &list);
725
726	while(!list_empty(&list)) {
727		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
728
729		list_del(&cq->entry);
730		__queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
731						  smp_processor_id()));
732	}
733
734	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
735
736}
737
738static int comp_pool_callback(struct notifier_block *nfb,
739			      unsigned long action,
740			      void *hcpu)
741{
742	unsigned int cpu = (unsigned long)hcpu;
743	struct ehca_cpu_comp_task *cct;
744
745	switch (action) {
746	case CPU_UP_PREPARE:
747	case CPU_UP_PREPARE_FROZEN:
748		ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
749		if(!create_comp_task(pool, cpu)) {
750			ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
751			return NOTIFY_BAD;
752		}
753		break;
754	case CPU_UP_CANCELED:
755	case CPU_UP_CANCELED_FROZEN:
756		ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
757		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
758		kthread_bind(cct->task, any_online_cpu(cpu_online_map));
759		destroy_comp_task(pool, cpu);
760		break;
761	case CPU_ONLINE:
762	case CPU_ONLINE_FROZEN:
763		ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
764		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
765		kthread_bind(cct->task, cpu);
766		wake_up_process(cct->task);
767		break;
768	case CPU_DOWN_PREPARE:
769	case CPU_DOWN_PREPARE_FROZEN:
770		ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
771		break;
772	case CPU_DOWN_FAILED:
773	case CPU_DOWN_FAILED_FROZEN:
774		ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
775		break;
776	case CPU_DEAD:
777	case CPU_DEAD_FROZEN:
778		ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
779		destroy_comp_task(pool, cpu);
780		take_over_work(pool, cpu);
781		break;
782	}
783
784	return NOTIFY_OK;
785}
786#endif
787
788int ehca_create_comp_pool(void)
789{
790	int cpu;
791	struct task_struct *task;
792
793	if (!ehca_scaling_code)
794		return 0;
795
796	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
797	if (pool == NULL)
798		return -ENOMEM;
799
800	spin_lock_init(&pool->last_cpu_lock);
801	pool->last_cpu = any_online_cpu(cpu_online_map);
802
803	pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
804	if (pool->cpu_comp_tasks == NULL) {
805		kfree(pool);
806		return -EINVAL;
807	}
808
809	for_each_online_cpu(cpu) {
810		task = create_comp_task(pool, cpu);
811		if (task) {
812			kthread_bind(task, cpu);
813			wake_up_process(task);
814		}
815	}
816
817#ifdef CONFIG_HOTPLUG_CPU
818	comp_pool_callback_nb.notifier_call = comp_pool_callback;
819	comp_pool_callback_nb.priority =0;
820	register_cpu_notifier(&comp_pool_callback_nb);
821#endif
822
823	printk(KERN_INFO "eHCA scaling code enabled\n");
824
825	return 0;
826}
827
828void ehca_destroy_comp_pool(void)
829{
830	int i;
831
832	if (!ehca_scaling_code)
833		return;
834
835#ifdef CONFIG_HOTPLUG_CPU
836	unregister_cpu_notifier(&comp_pool_callback_nb);
837#endif
838
839	for (i = 0; i < NR_CPUS; i++) {
840		if (cpu_online(i))
841			destroy_comp_task(pool, i);
842	}
843	free_percpu(pool->cpu_comp_tasks);
844	kfree(pool);
845}
846