1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2021 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 * 7 */ 8 9#define pr_fmt(fmt) "habanalabs: " fmt 10 11#include "habanalabs.h" 12#include "../include/hw_ip/pci/pci_general.h" 13 14#include <linux/pci.h> 15#include <linux/module.h> 16#include <linux/vmalloc.h> 17#include <linux/version.h> 18 19#include <drm/drm_accel.h> 20#include <drm/drm_drv.h> 21#include <drm/drm_ioctl.h> 22 23#define CREATE_TRACE_POINTS 24#include <trace/events/habanalabs.h> 25 26#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" 27 28#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" 29 30MODULE_AUTHOR(HL_DRIVER_AUTHOR); 31MODULE_DESCRIPTION(HL_DRIVER_DESC); 32MODULE_LICENSE("GPL v2"); 33 34static int hl_major; 35static DEFINE_IDR(hl_devs_idr); 36static DEFINE_MUTEX(hl_devs_idr_lock); 37 38#define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */ 39#define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */ 40 41static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED; 42static int reset_on_lockup = 1; 43static int memory_scrub; 44static ulong boot_error_status_mask = ULONG_MAX; 45 46module_param(timeout_locked, int, 0444); 47MODULE_PARM_DESC(timeout_locked, 48 "Device lockup timeout in seconds (0 = disabled, default 30s)"); 49 50module_param(reset_on_lockup, int, 0444); 51MODULE_PARM_DESC(reset_on_lockup, 52 "Do device reset on lockup (0 = no, 1 = yes, default yes)"); 53 54module_param(memory_scrub, int, 0444); 55MODULE_PARM_DESC(memory_scrub, 56 "Scrub device memory in various states (0 = no, 1 = yes, default no)"); 57 58module_param(boot_error_status_mask, ulong, 0444); 59MODULE_PARM_DESC(boot_error_status_mask, 60 "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)"); 61 62#define PCI_IDS_GOYA 0x0001 63#define PCI_IDS_GAUDI 0x1000 64#define PCI_IDS_GAUDI_SEC 0x1010 65 66#define PCI_IDS_GAUDI2 0x1020 67 68static const struct pci_device_id ids[] = { 69 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, 70 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, 71 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, 72 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), }, 73 { 0, } 74}; 75MODULE_DEVICE_TABLE(pci, ids); 76 77static const struct drm_ioctl_desc hl_drm_ioctls[] = { 78 DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0), 79 DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0), 80 DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0), 81 DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0), 82 DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0), 83 DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0), 84}; 85 86static const struct file_operations hl_fops = { 87 .owner = THIS_MODULE, 88 .open = accel_open, 89 .release = drm_release, 90 .unlocked_ioctl = drm_ioctl, 91 .compat_ioctl = drm_compat_ioctl, 92 .llseek = noop_llseek, 93 .mmap = hl_mmap 94}; 95 96static const struct drm_driver hl_driver = { 97 .driver_features = DRIVER_COMPUTE_ACCEL, 98 99 .name = HL_NAME, 100 .desc = HL_DRIVER_DESC, 101 .major = LINUX_VERSION_MAJOR, 102 .minor = LINUX_VERSION_PATCHLEVEL, 103 .patchlevel = LINUX_VERSION_SUBLEVEL, 104 .date = "20190505", 105 106 .fops = &hl_fops, 107 .open = hl_device_open, 108 .postclose = hl_device_release, 109 .ioctls = hl_drm_ioctls, 110 .num_ioctls = ARRAY_SIZE(hl_drm_ioctls) 111}; 112 113/* 114 * get_asic_type - translate device id to asic type 115 * 116 * @hdev: pointer to habanalabs device structure. 117 * 118 * Translate device id and revision id to asic type. 119 * In case of unidentified device, return -1 120 */ 121static enum hl_asic_type get_asic_type(struct hl_device *hdev) 122{ 123 struct pci_dev *pdev = hdev->pdev; 124 enum hl_asic_type asic_type = ASIC_INVALID; 125 126 switch (pdev->device) { 127 case PCI_IDS_GOYA: 128 asic_type = ASIC_GOYA; 129 break; 130 case PCI_IDS_GAUDI: 131 asic_type = ASIC_GAUDI; 132 break; 133 case PCI_IDS_GAUDI_SEC: 134 asic_type = ASIC_GAUDI_SEC; 135 break; 136 case PCI_IDS_GAUDI2: 137 switch (pdev->revision) { 138 case REV_ID_A: 139 asic_type = ASIC_GAUDI2; 140 break; 141 case REV_ID_B: 142 asic_type = ASIC_GAUDI2B; 143 break; 144 case REV_ID_C: 145 asic_type = ASIC_GAUDI2C; 146 break; 147 default: 148 break; 149 } 150 break; 151 default: 152 break; 153 } 154 155 return asic_type; 156} 157 158static bool is_asic_secured(enum hl_asic_type asic_type) 159{ 160 switch (asic_type) { 161 case ASIC_GAUDI_SEC: 162 return true; 163 default: 164 return false; 165 } 166} 167 168/* 169 * hl_device_open() - open function for habanalabs device. 170 * @ddev: pointer to DRM device structure. 171 * @file: pointer to DRM file private data structure. 172 * 173 * Called when process opens an habanalabs device. 174 */ 175int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv) 176{ 177 struct hl_device *hdev = to_hl_device(ddev); 178 enum hl_device_status status; 179 struct hl_fpriv *hpriv; 180 int rc; 181 182 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); 183 if (!hpriv) 184 return -ENOMEM; 185 186 hpriv->hdev = hdev; 187 mutex_init(&hpriv->notifier_event.lock); 188 mutex_init(&hpriv->restore_phase_mutex); 189 mutex_init(&hpriv->ctx_lock); 190 kref_init(&hpriv->refcount); 191 192 hl_ctx_mgr_init(&hpriv->ctx_mgr); 193 hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); 194 195 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); 196 197 mutex_lock(&hdev->fpriv_list_lock); 198 199 if (!hl_device_operational(hdev, &status)) { 200 dev_dbg_ratelimited(hdev->dev, 201 "Can't open %s because it is %s\n", 202 dev_name(hdev->dev), hdev->status[status]); 203 204 if (status == HL_DEVICE_STATUS_IN_RESET || 205 status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE) 206 rc = -EAGAIN; 207 else 208 rc = -EPERM; 209 210 goto out_err; 211 } 212 213 if (hdev->is_in_dram_scrub) { 214 dev_dbg_ratelimited(hdev->dev, 215 "Can't open %s during dram scrub\n", 216 dev_name(hdev->dev)); 217 rc = -EAGAIN; 218 goto out_err; 219 } 220 221 if (hdev->compute_ctx_in_release) { 222 dev_dbg_ratelimited(hdev->dev, 223 "Can't open %s because another user is still releasing it\n", 224 dev_name(hdev->dev)); 225 rc = -EAGAIN; 226 goto out_err; 227 } 228 229 if (hdev->is_compute_ctx_active) { 230 dev_dbg_ratelimited(hdev->dev, 231 "Can't open %s because another user is working on it\n", 232 dev_name(hdev->dev)); 233 rc = -EBUSY; 234 goto out_err; 235 } 236 237 rc = hl_ctx_create(hdev, hpriv); 238 if (rc) { 239 dev_err(hdev->dev, "Failed to create context %d\n", rc); 240 goto out_err; 241 } 242 243 list_add(&hpriv->dev_node, &hdev->fpriv_list); 244 mutex_unlock(&hdev->fpriv_list_lock); 245 246 hdev->asic_funcs->send_device_activity(hdev, true); 247 248 hl_debugfs_add_file(hpriv); 249 250 hl_enable_err_info_capture(&hdev->captured_err_info); 251 252 hdev->open_counter++; 253 hdev->last_successful_open_jif = jiffies; 254 hdev->last_successful_open_ktime = ktime_get(); 255 256 file_priv->driver_priv = hpriv; 257 hpriv->file_priv = file_priv; 258 259 return 0; 260 261out_err: 262 mutex_unlock(&hdev->fpriv_list_lock); 263 hl_mem_mgr_fini(&hpriv->mem_mgr); 264 hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); 265 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); 266 mutex_destroy(&hpriv->ctx_lock); 267 mutex_destroy(&hpriv->restore_phase_mutex); 268 mutex_destroy(&hpriv->notifier_event.lock); 269 put_pid(hpriv->taskpid); 270 271 kfree(hpriv); 272 273 return rc; 274} 275 276int hl_device_open_ctrl(struct inode *inode, struct file *filp) 277{ 278 struct hl_device *hdev; 279 struct hl_fpriv *hpriv; 280 int rc; 281 282 mutex_lock(&hl_devs_idr_lock); 283 hdev = idr_find(&hl_devs_idr, iminor(inode)); 284 mutex_unlock(&hl_devs_idr_lock); 285 286 if (!hdev) { 287 pr_err("Couldn't find device %d:%d\n", 288 imajor(inode), iminor(inode)); 289 return -ENXIO; 290 } 291 292 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); 293 if (!hpriv) 294 return -ENOMEM; 295 296 /* Prevent other routines from reading partial hpriv data by 297 * initializing hpriv fields before inserting it to the list 298 */ 299 hpriv->hdev = hdev; 300 filp->private_data = hpriv; 301 302 nonseekable_open(inode, filp); 303 304 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); 305 306 mutex_lock(&hdev->fpriv_ctrl_list_lock); 307 308 if (!hl_ctrl_device_operational(hdev, NULL)) { 309 dev_dbg_ratelimited(hdev->dev_ctrl, 310 "Can't open %s because it is disabled\n", 311 dev_name(hdev->dev_ctrl)); 312 rc = -EPERM; 313 goto out_err; 314 } 315 316 list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list); 317 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 318 319 return 0; 320 321out_err: 322 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 323 filp->private_data = NULL; 324 put_pid(hpriv->taskpid); 325 326 kfree(hpriv); 327 328 return rc; 329} 330 331static void set_driver_behavior_per_device(struct hl_device *hdev) 332{ 333 hdev->nic_ports_mask = 0; 334 hdev->fw_components = FW_TYPE_ALL_TYPES; 335 hdev->cpu_queues_enable = 1; 336 hdev->pldm = 0; 337 hdev->hard_reset_on_fw_events = 1; 338 hdev->bmc_enable = 1; 339 hdev->reset_on_preboot_fail = 1; 340 hdev->heartbeat = 1; 341} 342 343static void copy_kernel_module_params_to_device(struct hl_device *hdev) 344{ 345 hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); 346 347 hdev->major = hl_major; 348 hdev->memory_scrub = memory_scrub; 349 hdev->reset_on_lockup = reset_on_lockup; 350 hdev->boot_error_status_mask = boot_error_status_mask; 351} 352 353static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout) 354{ 355 switch (hdev->asic_type) { 356 case ASIC_GAUDI: 357 case ASIC_GAUDI_SEC: 358 /* If user didn't request a different timeout than the default one, we have 359 * a different default timeout for Gaudi 360 */ 361 if (timeout == HL_DEFAULT_TIMEOUT_LOCKED) 362 hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * 363 MSEC_PER_SEC); 364 365 hdev->reset_upon_device_release = 0; 366 break; 367 368 case ASIC_GOYA: 369 hdev->reset_upon_device_release = 0; 370 break; 371 372 default: 373 hdev->reset_upon_device_release = 1; 374 break; 375 } 376} 377 378static int fixup_device_params(struct hl_device *hdev) 379{ 380 int tmp_timeout; 381 382 tmp_timeout = timeout_locked; 383 384 hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; 385 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; 386 387 if (tmp_timeout) 388 hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); 389 else 390 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; 391 392 hdev->stop_on_err = true; 393 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 394 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 395 396 /* Enable only after the initialization of the device */ 397 hdev->disabled = true; 398 399 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) && 400 (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) { 401 pr_err("Preboot must be set along with other components"); 402 return -EINVAL; 403 } 404 405 /* If CPU queues not enabled, no way to do heartbeat */ 406 if (!hdev->cpu_queues_enable) 407 hdev->heartbeat = 0; 408 fixup_device_params_per_asic(hdev, tmp_timeout); 409 410 return 0; 411} 412 413static int allocate_device_id(struct hl_device *hdev) 414{ 415 int id; 416 417 mutex_lock(&hl_devs_idr_lock); 418 id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); 419 mutex_unlock(&hl_devs_idr_lock); 420 421 if (id < 0) { 422 if (id == -ENOSPC) 423 pr_err("too many devices in the system\n"); 424 return -EBUSY; 425 } 426 427 hdev->id = id; 428 429 /* 430 * Firstly initialized with the internal device ID. 431 * Will be updated later after the DRM device registration to hold the minor ID. 432 */ 433 hdev->cdev_idx = hdev->id; 434 435 return 0; 436} 437 438/** 439 * create_hdev - create habanalabs device instance 440 * 441 * @dev: will hold the pointer to the new habanalabs device structure 442 * @pdev: pointer to the pci device 443 * 444 * Allocate memory for habanalabs device and initialize basic fields 445 * Identify the ASIC type 446 * Allocate ID (minor) for the device (only for real devices) 447 */ 448static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) 449{ 450 struct hl_device *hdev; 451 int rc; 452 453 *dev = NULL; 454 455 hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm); 456 if (IS_ERR(hdev)) 457 return PTR_ERR(hdev); 458 459 hdev->dev = hdev->drm.dev; 460 461 /* Will be NULL in case of simulator device */ 462 hdev->pdev = pdev; 463 464 /* Assign status description string */ 465 strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); 466 strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); 467 strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); 468 strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); 469 strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], 470 "in device creation", HL_STR_MAX); 471 strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], 472 "in reset after device release", HL_STR_MAX); 473 474 475 /* First, we must find out which ASIC are we handling. This is needed 476 * to configure the behavior of the driver (kernel parameters) 477 */ 478 hdev->asic_type = get_asic_type(hdev); 479 if (hdev->asic_type == ASIC_INVALID) { 480 dev_err(&pdev->dev, "Unsupported ASIC\n"); 481 rc = -ENODEV; 482 goto out_err; 483 } 484 485 copy_kernel_module_params_to_device(hdev); 486 487 set_driver_behavior_per_device(hdev); 488 489 fixup_device_params(hdev); 490 491 rc = allocate_device_id(hdev); 492 if (rc) 493 goto out_err; 494 495 *dev = hdev; 496 497 return 0; 498 499out_err: 500 return rc; 501} 502 503/* 504 * destroy_hdev - destroy habanalabs device instance 505 * 506 * @dev: pointer to the habanalabs device structure 507 * 508 */ 509static void destroy_hdev(struct hl_device *hdev) 510{ 511 /* Remove device from the device list */ 512 mutex_lock(&hl_devs_idr_lock); 513 idr_remove(&hl_devs_idr, hdev->id); 514 mutex_unlock(&hl_devs_idr_lock); 515 516} 517 518static int hl_pmops_suspend(struct device *dev) 519{ 520 struct hl_device *hdev = dev_get_drvdata(dev); 521 522 pr_debug("Going to suspend PCI device\n"); 523 524 if (!hdev) { 525 pr_err("device pointer is NULL in suspend\n"); 526 return 0; 527 } 528 529 return hl_device_suspend(hdev); 530} 531 532static int hl_pmops_resume(struct device *dev) 533{ 534 struct hl_device *hdev = dev_get_drvdata(dev); 535 536 pr_debug("Going to resume PCI device\n"); 537 538 if (!hdev) { 539 pr_err("device pointer is NULL in resume\n"); 540 return 0; 541 } 542 543 return hl_device_resume(hdev); 544} 545 546/** 547 * hl_pci_probe - probe PCI habanalabs devices 548 * 549 * @pdev: pointer to pci device 550 * @id: pointer to pci device id structure 551 * 552 * Standard PCI probe function for habanalabs device. 553 * Create a new habanalabs device and initialize it according to the 554 * device's type 555 */ 556static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 557{ 558 struct hl_device *hdev; 559 int rc; 560 561 dev_info(&pdev->dev, HL_NAME 562 " device found [%04x:%04x] (rev %x)\n", 563 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); 564 565 rc = create_hdev(&hdev, pdev); 566 if (rc) 567 return rc; 568 569 pci_set_drvdata(pdev, hdev); 570 571 rc = hl_device_init(hdev); 572 if (rc) { 573 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); 574 rc = -ENODEV; 575 goto disable_device; 576 } 577 578 return 0; 579 580disable_device: 581 pci_set_drvdata(pdev, NULL); 582 destroy_hdev(hdev); 583 584 return rc; 585} 586 587/* 588 * hl_pci_remove - remove PCI habanalabs devices 589 * 590 * @pdev: pointer to pci device 591 * 592 * Standard PCI remove function for habanalabs device 593 */ 594static void hl_pci_remove(struct pci_dev *pdev) 595{ 596 struct hl_device *hdev; 597 598 hdev = pci_get_drvdata(pdev); 599 if (!hdev) 600 return; 601 602 hl_device_fini(hdev); 603 pci_set_drvdata(pdev, NULL); 604 destroy_hdev(hdev); 605} 606 607/** 608 * hl_pci_err_detected - a PCI bus error detected on this device 609 * 610 * @pdev: pointer to pci device 611 * @state: PCI error type 612 * 613 * Called by the PCI subsystem whenever a non-correctable 614 * PCI bus error is detected 615 */ 616static pci_ers_result_t 617hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) 618{ 619 struct hl_device *hdev = pci_get_drvdata(pdev); 620 enum pci_ers_result result; 621 622 switch (state) { 623 case pci_channel_io_normal: 624 dev_warn(hdev->dev, "PCI normal state error detected\n"); 625 return PCI_ERS_RESULT_CAN_RECOVER; 626 627 case pci_channel_io_frozen: 628 dev_warn(hdev->dev, "PCI frozen state error detected\n"); 629 result = PCI_ERS_RESULT_NEED_RESET; 630 break; 631 632 case pci_channel_io_perm_failure: 633 dev_warn(hdev->dev, "PCI failure state error detected\n"); 634 result = PCI_ERS_RESULT_DISCONNECT; 635 break; 636 637 default: 638 result = PCI_ERS_RESULT_NONE; 639 } 640 641 hdev->asic_funcs->halt_engines(hdev, true, false); 642 643 return result; 644} 645 646/** 647 * hl_pci_err_resume - resume after a PCI slot reset 648 * 649 * @pdev: pointer to pci device 650 * 651 */ 652static void hl_pci_err_resume(struct pci_dev *pdev) 653{ 654 struct hl_device *hdev = pci_get_drvdata(pdev); 655 656 dev_warn(hdev->dev, "Resuming device after PCI slot reset\n"); 657 hl_device_resume(hdev); 658} 659 660/** 661 * hl_pci_err_slot_reset - a PCI slot reset has just happened 662 * 663 * @pdev: pointer to pci device 664 * 665 * Determine if the driver can recover from the PCI slot reset 666 */ 667static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev) 668{ 669 struct hl_device *hdev = pci_get_drvdata(pdev); 670 671 dev_warn(hdev->dev, "PCI slot reset detected\n"); 672 673 return PCI_ERS_RESULT_RECOVERED; 674} 675 676static void hl_pci_reset_prepare(struct pci_dev *pdev) 677{ 678 struct hl_device *hdev; 679 680 hdev = pci_get_drvdata(pdev); 681 if (!hdev) 682 return; 683 684 hdev->disabled = true; 685} 686 687static void hl_pci_reset_done(struct pci_dev *pdev) 688{ 689 struct hl_device *hdev; 690 u32 flags; 691 692 hdev = pci_get_drvdata(pdev); 693 if (!hdev) 694 return; 695 696 /* 697 * Schedule a thread to trigger hard reset. 698 * The reason for this handler, is for rare cases where the driver is up 699 * and FLR occurs. This is valid only when working with no VM, so FW handles FLR 700 * and resets the device. FW will go back preboot stage, so driver needs to perform 701 * hard reset in order to load FW fit again. 702 */ 703 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW; 704 705 hl_device_reset(hdev, flags); 706} 707 708static const struct dev_pm_ops hl_pm_ops = { 709 .suspend = hl_pmops_suspend, 710 .resume = hl_pmops_resume, 711}; 712 713static const struct pci_error_handlers hl_pci_err_handler = { 714 .error_detected = hl_pci_err_detected, 715 .slot_reset = hl_pci_err_slot_reset, 716 .resume = hl_pci_err_resume, 717 .reset_prepare = hl_pci_reset_prepare, 718 .reset_done = hl_pci_reset_done, 719}; 720 721static struct pci_driver hl_pci_driver = { 722 .name = HL_NAME, 723 .id_table = ids, 724 .probe = hl_pci_probe, 725 .remove = hl_pci_remove, 726 .shutdown = hl_pci_remove, 727 .driver = { 728 .name = HL_NAME, 729 .pm = &hl_pm_ops, 730 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 731 }, 732 .err_handler = &hl_pci_err_handler, 733}; 734 735/* 736 * hl_init - Initialize the habanalabs kernel driver 737 */ 738static int __init hl_init(void) 739{ 740 int rc; 741 dev_t dev; 742 743 pr_info("loading driver\n"); 744 745 rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME); 746 if (rc < 0) { 747 pr_err("unable to get major\n"); 748 return rc; 749 } 750 751 hl_major = MAJOR(dev); 752 753 rc = pci_register_driver(&hl_pci_driver); 754 if (rc) { 755 pr_err("failed to register pci device\n"); 756 goto remove_major; 757 } 758 759 pr_debug("driver loaded\n"); 760 761 return 0; 762 763remove_major: 764 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); 765 return rc; 766} 767 768/* 769 * hl_exit - Release all resources of the habanalabs kernel driver 770 */ 771static void __exit hl_exit(void) 772{ 773 pci_unregister_driver(&hl_pci_driver); 774 775 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); 776 777 idr_destroy(&hl_devs_idr); 778 779 pr_debug("driver removed\n"); 780} 781 782module_init(hl_init); 783module_exit(hl_exit); 784