1/* 2 * IBM eServer eHCA Infiniband device driver for Linux on POWER 3 * 4 * Functions for EQs, NEQs and interrupts 5 * 6 * Authors: Heiko J Schick <schickhj@de.ibm.com> 7 * Khadija Souissi <souissi@de.ibm.com> 8 * 9 * Copyright (c) 2005 IBM Corporation 10 * 11 * All rights reserved. 12 * 13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB 14 * BSD. 15 * 16 * OpenIB BSD License 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are met: 20 * 21 * Redistributions of source code must retain the above copyright notice, this 22 * list of conditions and the following disclaimer. 23 * 24 * Redistributions in binary form must reproduce the above copyright notice, 25 * this list of conditions and the following disclaimer in the documentation 26 * and/or other materials 27 * provided with the distribution. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42#include "ehca_classes.h" 43#include "ehca_irq.h" 44#include "ehca_iverbs.h" 45#include "ehca_tools.h" 46#include "hcp_if.h" 47#include "hipz_fns.h" 48#include "ipz_pt_fn.h" 49 50#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) 51#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31) 52#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7) 53#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31) 54#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31) 55#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63) 56#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63) 57 58#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1) 59#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7) 60#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15) 61#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16) 62 63#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63) 64#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7) 65 66static void queue_comp_task(struct ehca_cq *__cq); 67 68static struct ehca_comp_pool* pool; 69#ifdef CONFIG_HOTPLUG_CPU 70static struct notifier_block comp_pool_callback_nb; 71#endif 72 73static inline void comp_event_callback(struct ehca_cq *cq) 74{ 75 if (!cq->ib_cq.comp_handler) 76 return; 77 78 spin_lock(&cq->cb_lock); 79 cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); 80 spin_unlock(&cq->cb_lock); 81 82 return; 83} 84 85static void print_error_data(struct ehca_shca * shca, void* data, 86 u64* rblock, int length) 87{ 88 u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); 89 u64 resource = rblock[1]; 90 91 switch (type) { 92 case 0x1: /* Queue Pair */ 93 { 94 struct ehca_qp *qp = (struct ehca_qp*)data; 95 96 /* only print error data if AER is set */ 97 if (rblock[6] == 0) 98 return; 99 100 ehca_err(&shca->ib_device, 101 "QP 0x%x (resource=%lx) has errors.", 102 qp->ib_qp.qp_num, resource); 103 break; 104 } 105 case 0x4: /* Completion Queue */ 106 { 107 struct ehca_cq *cq = (struct ehca_cq*)data; 108 109 ehca_err(&shca->ib_device, 110 "CQ 0x%x (resource=%lx) has errors.", 111 cq->cq_number, resource); 112 break; 113 } 114 default: 115 ehca_err(&shca->ib_device, 116 "Unknown errror type: %lx on %s.", 117 type, shca->ib_device.name); 118 break; 119 } 120 121 ehca_err(&shca->ib_device, "Error data is available: %lx.", resource); 122 ehca_err(&shca->ib_device, "EHCA ----- error data begin " 123 "---------------------------------------------------"); 124 ehca_dmp(rblock, length, "resource=%lx", resource); 125 ehca_err(&shca->ib_device, "EHCA ----- error data end " 126 "----------------------------------------------------"); 127 128 return; 129} 130 131int ehca_error_data(struct ehca_shca *shca, void *data, 132 u64 resource) 133{ 134 135 unsigned long ret; 136 u64 *rblock; 137 unsigned long block_count; 138 139 rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); 140 if (!rblock) { 141 ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); 142 ret = -ENOMEM; 143 goto error_data1; 144 } 145 146 /* rblock must be 4K aligned and should be 4K large */ 147 ret = hipz_h_error_data(shca->ipz_hca_handle, 148 resource, 149 rblock, 150 &block_count); 151 152 if (ret == H_R_STATE) 153 ehca_err(&shca->ib_device, 154 "No error data is available: %lx.", resource); 155 else if (ret == H_SUCCESS) { 156 int length; 157 158 length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); 159 160 if (length > EHCA_PAGESIZE) 161 length = EHCA_PAGESIZE; 162 163 print_error_data(shca, data, rblock, length); 164 } else 165 ehca_err(&shca->ib_device, 166 "Error data could not be fetched: %lx", resource); 167 168 ehca_free_fw_ctrlblock(rblock); 169 170error_data1: 171 return ret; 172 173} 174 175static void qp_event_callback(struct ehca_shca *shca, 176 u64 eqe, 177 enum ib_event_type event_type) 178{ 179 struct ib_event event; 180 struct ehca_qp *qp; 181 unsigned long flags; 182 u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); 183 184 spin_lock_irqsave(&ehca_qp_idr_lock, flags); 185 qp = idr_find(&ehca_qp_idr, token); 186 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags); 187 188 189 if (!qp) 190 return; 191 192 ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); 193 194 if (!qp->ib_qp.event_handler) 195 return; 196 197 event.device = &shca->ib_device; 198 event.event = event_type; 199 event.element.qp = &qp->ib_qp; 200 201 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); 202 203 return; 204} 205 206static void cq_event_callback(struct ehca_shca *shca, 207 u64 eqe) 208{ 209 struct ehca_cq *cq; 210 unsigned long flags; 211 u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); 212 213 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 214 cq = idr_find(&ehca_cq_idr, token); 215 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 216 217 if (!cq) 218 return; 219 220 ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); 221 222 return; 223} 224 225static void parse_identifier(struct ehca_shca *shca, u64 eqe) 226{ 227 u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); 228 229 switch (identifier) { 230 case 0x02: /* path migrated */ 231 qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG); 232 break; 233 case 0x03: /* communication established */ 234 qp_event_callback(shca, eqe, IB_EVENT_COMM_EST); 235 break; 236 case 0x04: /* send queue drained */ 237 qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED); 238 break; 239 case 0x05: /* QP error */ 240 case 0x06: /* QP error */ 241 qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL); 242 break; 243 case 0x07: /* CQ error */ 244 case 0x08: /* CQ error */ 245 cq_event_callback(shca, eqe); 246 break; 247 case 0x09: /* MRMWPTE error */ 248 ehca_err(&shca->ib_device, "MRMWPTE error."); 249 break; 250 case 0x0A: /* port event */ 251 ehca_err(&shca->ib_device, "Port event."); 252 break; 253 case 0x0B: /* MR access error */ 254 ehca_err(&shca->ib_device, "MR access error."); 255 break; 256 case 0x0C: /* EQ error */ 257 ehca_err(&shca->ib_device, "EQ error."); 258 break; 259 case 0x0D: /* P/Q_Key mismatch */ 260 ehca_err(&shca->ib_device, "P/Q_Key mismatch."); 261 break; 262 case 0x10: /* sampling complete */ 263 ehca_err(&shca->ib_device, "Sampling complete."); 264 break; 265 case 0x11: /* unaffiliated access error */ 266 ehca_err(&shca->ib_device, "Unaffiliated access error."); 267 break; 268 case 0x12: /* path migrating error */ 269 ehca_err(&shca->ib_device, "Path migration error."); 270 break; 271 case 0x13: /* interface trace stopped */ 272 ehca_err(&shca->ib_device, "Interface trace stopped."); 273 break; 274 case 0x14: /* first error capture info available */ 275 default: 276 ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", 277 identifier, shca->ib_device.name); 278 break; 279 } 280 281 return; 282} 283 284static void parse_ec(struct ehca_shca *shca, u64 eqe) 285{ 286 struct ib_event event; 287 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); 288 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); 289 290 switch (ec) { 291 case 0x30: /* port availability change */ 292 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { 293 ehca_info(&shca->ib_device, 294 "port %x is active.", port); 295 event.device = &shca->ib_device; 296 event.event = IB_EVENT_PORT_ACTIVE; 297 event.element.port_num = port; 298 shca->sport[port - 1].port_state = IB_PORT_ACTIVE; 299 ib_dispatch_event(&event); 300 } else { 301 ehca_info(&shca->ib_device, 302 "port %x is inactive.", port); 303 event.device = &shca->ib_device; 304 event.event = IB_EVENT_PORT_ERR; 305 event.element.port_num = port; 306 shca->sport[port - 1].port_state = IB_PORT_DOWN; 307 ib_dispatch_event(&event); 308 } 309 break; 310 case 0x31: 311 /* port configuration change 312 * disruptive change is caused by 313 * LID, PKEY or SM change 314 */ 315 ehca_warn(&shca->ib_device, 316 "disruptive port %x configuration change", port); 317 318 ehca_info(&shca->ib_device, 319 "port %x is inactive.", port); 320 event.device = &shca->ib_device; 321 event.event = IB_EVENT_PORT_ERR; 322 event.element.port_num = port; 323 shca->sport[port - 1].port_state = IB_PORT_DOWN; 324 ib_dispatch_event(&event); 325 326 ehca_info(&shca->ib_device, 327 "port %x is active.", port); 328 event.device = &shca->ib_device; 329 event.event = IB_EVENT_PORT_ACTIVE; 330 event.element.port_num = port; 331 shca->sport[port - 1].port_state = IB_PORT_ACTIVE; 332 ib_dispatch_event(&event); 333 break; 334 case 0x32: /* adapter malfunction */ 335 ehca_err(&shca->ib_device, "Adapter malfunction."); 336 break; 337 case 0x33: /* trace stopped */ 338 ehca_err(&shca->ib_device, "Traced stopped."); 339 break; 340 default: 341 ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", 342 ec, shca->ib_device.name); 343 break; 344 } 345 346 return; 347} 348 349static inline void reset_eq_pending(struct ehca_cq *cq) 350{ 351 u64 CQx_EP; 352 struct h_galpa gal = cq->galpas.kernel; 353 354 hipz_galpa_store_cq(gal, cqx_ep, 0x0); 355 CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); 356 357 return; 358} 359 360irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) 361{ 362 struct ehca_shca *shca = (struct ehca_shca*)dev_id; 363 364 tasklet_hi_schedule(&shca->neq.interrupt_task); 365 366 return IRQ_HANDLED; 367} 368 369void ehca_tasklet_neq(unsigned long data) 370{ 371 struct ehca_shca *shca = (struct ehca_shca*)data; 372 struct ehca_eqe *eqe; 373 u64 ret; 374 375 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); 376 377 while (eqe) { 378 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) 379 parse_ec(shca, eqe->entry); 380 381 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); 382 } 383 384 ret = hipz_h_reset_event(shca->ipz_hca_handle, 385 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); 386 387 if (ret != H_SUCCESS) 388 ehca_err(&shca->ib_device, "Can't clear notification events."); 389 390 return; 391} 392 393irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) 394{ 395 struct ehca_shca *shca = (struct ehca_shca*)dev_id; 396 397 tasklet_hi_schedule(&shca->eq.interrupt_task); 398 399 return IRQ_HANDLED; 400} 401 402 403static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) 404{ 405 u64 eqe_value; 406 u32 token; 407 unsigned long flags; 408 struct ehca_cq *cq; 409 410 eqe_value = eqe->entry; 411 ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value); 412 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { 413 ehca_dbg(&shca->ib_device, "Got completion event"); 414 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); 415 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 416 cq = idr_find(&ehca_cq_idr, token); 417 if (cq == NULL) { 418 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 419 ehca_err(&shca->ib_device, 420 "Invalid eqe for non-existing cq token=%x", 421 token); 422 return; 423 } 424 reset_eq_pending(cq); 425 cq->nr_events++; 426 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 427 if (ehca_scaling_code) 428 queue_comp_task(cq); 429 else { 430 comp_event_callback(cq); 431 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 432 cq->nr_events--; 433 if (!cq->nr_events) 434 wake_up(&cq->wait_completion); 435 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 436 } 437 } else { 438 ehca_dbg(&shca->ib_device, "Got non completion event"); 439 parse_identifier(shca, eqe_value); 440 } 441} 442 443void ehca_process_eq(struct ehca_shca *shca, int is_irq) 444{ 445 struct ehca_eq *eq = &shca->eq; 446 struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; 447 u64 eqe_value; 448 unsigned long flags; 449 int eqe_cnt, i; 450 int eq_empty = 0; 451 452 spin_lock_irqsave(&eq->irq_spinlock, flags); 453 if (is_irq) { 454 const int max_query_cnt = 100; 455 int query_cnt = 0; 456 int int_state = 1; 457 do { 458 int_state = hipz_h_query_int_state( 459 shca->ipz_hca_handle, eq->ist); 460 query_cnt++; 461 iosync(); 462 } while (int_state && query_cnt < max_query_cnt); 463 if (unlikely((query_cnt == max_query_cnt))) 464 ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", 465 int_state, query_cnt); 466 } 467 468 /* read out all eqes */ 469 eqe_cnt = 0; 470 do { 471 u32 token; 472 eqe_cache[eqe_cnt].eqe = 473 (struct ehca_eqe *)ehca_poll_eq(shca, eq); 474 if (!eqe_cache[eqe_cnt].eqe) 475 break; 476 eqe_value = eqe_cache[eqe_cnt].eqe->entry; 477 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { 478 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); 479 spin_lock(&ehca_cq_idr_lock); 480 eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); 481 if (!eqe_cache[eqe_cnt].cq) { 482 spin_unlock(&ehca_cq_idr_lock); 483 ehca_err(&shca->ib_device, 484 "Invalid eqe for non-existing cq " 485 "token=%x", token); 486 continue; 487 } 488 eqe_cache[eqe_cnt].cq->nr_events++; 489 spin_unlock(&ehca_cq_idr_lock); 490 } else 491 eqe_cache[eqe_cnt].cq = NULL; 492 eqe_cnt++; 493 } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); 494 if (!eqe_cnt) { 495 if (is_irq) 496 ehca_dbg(&shca->ib_device, 497 "No eqe found for irq event"); 498 goto unlock_irq_spinlock; 499 } else if (!is_irq) 500 ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); 501 if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) 502 ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); 503 /* enable irq for new packets */ 504 for (i = 0; i < eqe_cnt; i++) { 505 if (eq->eqe_cache[i].cq) 506 reset_eq_pending(eq->eqe_cache[i].cq); 507 } 508 /* check eq */ 509 spin_lock(&eq->spinlock); 510 eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); 511 spin_unlock(&eq->spinlock); 512 /* call completion handler for cached eqes */ 513 for (i = 0; i < eqe_cnt; i++) 514 if (eq->eqe_cache[i].cq) { 515 if (ehca_scaling_code) 516 queue_comp_task(eq->eqe_cache[i].cq); 517 else { 518 struct ehca_cq *cq = eq->eqe_cache[i].cq; 519 comp_event_callback(cq); 520 spin_lock(&ehca_cq_idr_lock); 521 cq->nr_events--; 522 if (!cq->nr_events) 523 wake_up(&cq->wait_completion); 524 spin_unlock(&ehca_cq_idr_lock); 525 } 526 } else { 527 ehca_dbg(&shca->ib_device, "Got non completion event"); 528 parse_identifier(shca, eq->eqe_cache[i].eqe->entry); 529 } 530 /* poll eq if not empty */ 531 if (eq_empty) 532 goto unlock_irq_spinlock; 533 do { 534 struct ehca_eqe *eqe; 535 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); 536 if (!eqe) 537 break; 538 process_eqe(shca, eqe); 539 } while (1); 540 541unlock_irq_spinlock: 542 spin_unlock_irqrestore(&eq->irq_spinlock, flags); 543} 544 545void ehca_tasklet_eq(unsigned long data) 546{ 547 ehca_process_eq((struct ehca_shca*)data, 1); 548} 549 550static inline int find_next_online_cpu(struct ehca_comp_pool* pool) 551{ 552 int cpu; 553 unsigned long flags; 554 555 WARN_ON_ONCE(!in_interrupt()); 556 if (ehca_debug_level) 557 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); 558 559 spin_lock_irqsave(&pool->last_cpu_lock, flags); 560 cpu = next_cpu(pool->last_cpu, cpu_online_map); 561 if (cpu == NR_CPUS) 562 cpu = first_cpu(cpu_online_map); 563 pool->last_cpu = cpu; 564 spin_unlock_irqrestore(&pool->last_cpu_lock, flags); 565 566 return cpu; 567} 568 569static void __queue_comp_task(struct ehca_cq *__cq, 570 struct ehca_cpu_comp_task *cct) 571{ 572 unsigned long flags; 573 574 spin_lock_irqsave(&cct->task_lock, flags); 575 spin_lock(&__cq->task_lock); 576 577 if (__cq->nr_callbacks == 0) { 578 __cq->nr_callbacks++; 579 list_add_tail(&__cq->entry, &cct->cq_list); 580 cct->cq_jobs++; 581 wake_up(&cct->wait_queue); 582 } else 583 __cq->nr_callbacks++; 584 585 spin_unlock(&__cq->task_lock); 586 spin_unlock_irqrestore(&cct->task_lock, flags); 587} 588 589static void queue_comp_task(struct ehca_cq *__cq) 590{ 591 int cpu_id; 592 struct ehca_cpu_comp_task *cct; 593 int cq_jobs; 594 unsigned long flags; 595 596 cpu_id = find_next_online_cpu(pool); 597 BUG_ON(!cpu_online(cpu_id)); 598 599 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 600 BUG_ON(!cct); 601 602 spin_lock_irqsave(&cct->task_lock, flags); 603 cq_jobs = cct->cq_jobs; 604 spin_unlock_irqrestore(&cct->task_lock, flags); 605 if (cq_jobs > 0) { 606 cpu_id = find_next_online_cpu(pool); 607 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 608 BUG_ON(!cct); 609 } 610 611 __queue_comp_task(__cq, cct); 612} 613 614static void run_comp_task(struct ehca_cpu_comp_task* cct) 615{ 616 struct ehca_cq *cq; 617 unsigned long flags; 618 619 spin_lock_irqsave(&cct->task_lock, flags); 620 621 while (!list_empty(&cct->cq_list)) { 622 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 623 spin_unlock_irqrestore(&cct->task_lock, flags); 624 comp_event_callback(cq); 625 626 spin_lock_irqsave(&ehca_cq_idr_lock, flags); 627 cq->nr_events--; 628 if (!cq->nr_events) 629 wake_up(&cq->wait_completion); 630 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags); 631 632 spin_lock_irqsave(&cct->task_lock, flags); 633 spin_lock(&cq->task_lock); 634 cq->nr_callbacks--; 635 if (!cq->nr_callbacks) { 636 list_del_init(cct->cq_list.next); 637 cct->cq_jobs--; 638 } 639 spin_unlock(&cq->task_lock); 640 } 641 642 spin_unlock_irqrestore(&cct->task_lock, flags); 643} 644 645static int comp_task(void *__cct) 646{ 647 struct ehca_cpu_comp_task* cct = __cct; 648 int cql_empty; 649 DECLARE_WAITQUEUE(wait, current); 650 651 set_current_state(TASK_INTERRUPTIBLE); 652 while(!kthread_should_stop()) { 653 add_wait_queue(&cct->wait_queue, &wait); 654 655 spin_lock_irq(&cct->task_lock); 656 cql_empty = list_empty(&cct->cq_list); 657 spin_unlock_irq(&cct->task_lock); 658 if (cql_empty) 659 schedule(); 660 else 661 __set_current_state(TASK_RUNNING); 662 663 remove_wait_queue(&cct->wait_queue, &wait); 664 665 spin_lock_irq(&cct->task_lock); 666 cql_empty = list_empty(&cct->cq_list); 667 spin_unlock_irq(&cct->task_lock); 668 if (!cql_empty) 669 run_comp_task(__cct); 670 671 set_current_state(TASK_INTERRUPTIBLE); 672 } 673 __set_current_state(TASK_RUNNING); 674 675 return 0; 676} 677 678static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, 679 int cpu) 680{ 681 struct ehca_cpu_comp_task *cct; 682 683 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 684 spin_lock_init(&cct->task_lock); 685 INIT_LIST_HEAD(&cct->cq_list); 686 init_waitqueue_head(&cct->wait_queue); 687 cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); 688 689 return cct->task; 690} 691 692static void destroy_comp_task(struct ehca_comp_pool *pool, 693 int cpu) 694{ 695 struct ehca_cpu_comp_task *cct; 696 struct task_struct *task; 697 unsigned long flags_cct; 698 699 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 700 701 spin_lock_irqsave(&cct->task_lock, flags_cct); 702 703 task = cct->task; 704 cct->task = NULL; 705 cct->cq_jobs = 0; 706 707 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 708 709 if (task) 710 kthread_stop(task); 711} 712 713#ifdef CONFIG_HOTPLUG_CPU 714static void take_over_work(struct ehca_comp_pool *pool, 715 int cpu) 716{ 717 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 718 LIST_HEAD(list); 719 struct ehca_cq *cq; 720 unsigned long flags_cct; 721 722 spin_lock_irqsave(&cct->task_lock, flags_cct); 723 724 list_splice_init(&cct->cq_list, &list); 725 726 while(!list_empty(&list)) { 727 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 728 729 list_del(&cq->entry); 730 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, 731 smp_processor_id())); 732 } 733 734 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 735 736} 737 738static int comp_pool_callback(struct notifier_block *nfb, 739 unsigned long action, 740 void *hcpu) 741{ 742 unsigned int cpu = (unsigned long)hcpu; 743 struct ehca_cpu_comp_task *cct; 744 745 switch (action) { 746 case CPU_UP_PREPARE: 747 case CPU_UP_PREPARE_FROZEN: 748 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); 749 if(!create_comp_task(pool, cpu)) { 750 ehca_gen_err("Can't create comp_task for cpu: %x", cpu); 751 return NOTIFY_BAD; 752 } 753 break; 754 case CPU_UP_CANCELED: 755 case CPU_UP_CANCELED_FROZEN: 756 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); 757 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 758 kthread_bind(cct->task, any_online_cpu(cpu_online_map)); 759 destroy_comp_task(pool, cpu); 760 break; 761 case CPU_ONLINE: 762 case CPU_ONLINE_FROZEN: 763 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); 764 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 765 kthread_bind(cct->task, cpu); 766 wake_up_process(cct->task); 767 break; 768 case CPU_DOWN_PREPARE: 769 case CPU_DOWN_PREPARE_FROZEN: 770 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); 771 break; 772 case CPU_DOWN_FAILED: 773 case CPU_DOWN_FAILED_FROZEN: 774 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); 775 break; 776 case CPU_DEAD: 777 case CPU_DEAD_FROZEN: 778 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); 779 destroy_comp_task(pool, cpu); 780 take_over_work(pool, cpu); 781 break; 782 } 783 784 return NOTIFY_OK; 785} 786#endif 787 788int ehca_create_comp_pool(void) 789{ 790 int cpu; 791 struct task_struct *task; 792 793 if (!ehca_scaling_code) 794 return 0; 795 796 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); 797 if (pool == NULL) 798 return -ENOMEM; 799 800 spin_lock_init(&pool->last_cpu_lock); 801 pool->last_cpu = any_online_cpu(cpu_online_map); 802 803 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); 804 if (pool->cpu_comp_tasks == NULL) { 805 kfree(pool); 806 return -EINVAL; 807 } 808 809 for_each_online_cpu(cpu) { 810 task = create_comp_task(pool, cpu); 811 if (task) { 812 kthread_bind(task, cpu); 813 wake_up_process(task); 814 } 815 } 816 817#ifdef CONFIG_HOTPLUG_CPU 818 comp_pool_callback_nb.notifier_call = comp_pool_callback; 819 comp_pool_callback_nb.priority =0; 820 register_cpu_notifier(&comp_pool_callback_nb); 821#endif 822 823 printk(KERN_INFO "eHCA scaling code enabled\n"); 824 825 return 0; 826} 827 828void ehca_destroy_comp_pool(void) 829{ 830 int i; 831 832 if (!ehca_scaling_code) 833 return; 834 835#ifdef CONFIG_HOTPLUG_CPU 836 unregister_cpu_notifier(&comp_pool_callback_nb); 837#endif 838 839 for (i = 0; i < NR_CPUS; i++) { 840 if (cpu_online(i)) 841 destroy_comp_task(pool, i); 842 } 843 free_percpu(pool->cpu_comp_tasks); 844 kfree(pool); 845} 846