// SPDX-License-Identifier: GPL-2.0+ // Copyright 2017 IBM Corp. #include #include #include #include "pci.h" #define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full #define PNV_OCXL_ACTAG_MAX 64 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ #define PNV_OCXL_PASID_BITS 15 #define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) #define AFU_PRESENT (1 << 31) #define AFU_INDEX_MASK 0x3F000000 #define AFU_INDEX_SHIFT 24 #define ACTAG_MASK 0xFFF struct actag_range { u16 start; u16 count; }; struct npu_link { struct list_head list; int domain; int bus; int dev; u16 fn_desired_actags[8]; struct actag_range fn_actags[8]; bool assignment_done; }; static struct list_head links_list = LIST_HEAD_INIT(links_list); static DEFINE_MUTEX(links_list_lock); /* * opencapi actags handling: * * When sending commands, the opencapi device references the memory * context it's targeting with an 'actag', which is really an alias * for a (BDF, pasid) combination. When it receives a command, the NPU * must do a lookup of the actag to identify the memory context. The * hardware supports a finite number of actags per link (64 for * POWER9). * * The device can carry multiple functions, and each function can have * multiple AFUs. Each AFU advertises in its config space the number * of desired actags. The host must configure in the config space of * the AFU how many actags the AFU is really allowed to use (which can * be less than what the AFU desires). * * When a PCI function is probed by the driver, it has no visibility * about the other PCI functions and how many actags they'd like, * which makes it impossible to distribute actags fairly among AFUs. * * Unfortunately, the only way to know how many actags a function * desires is by looking at the data for each AFU in the config space * and add them up. Similarly, the only way to know how many actags * all the functions of the physical device desire is by adding the * previously computed function counts. Then we can match that against * what the hardware supports. * * To get a comprehensive view, we use a 'pci fixup': at the end of * PCI enumeration, each function counts how many actags its AFUs * desire and we save it in a 'npu_link' structure, shared between all * the PCI functions of a same device. Therefore, when the first * function is probed by the driver, we can get an idea of the total * count of desired actags for the device, and assign the actags to * the AFUs, by pro-rating if needed. */ static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) { int vsec = pos; u16 vendor, id; while ((vsec = pci_find_next_ext_capability(dev, vsec, OCXL_EXT_CAP_ID_DVSEC))) { pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, &vendor); pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) return vsec; } return 0; } static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) { int vsec = 0; u8 idx; while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, vsec))) { pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, &idx); if (idx == afu_idx) return vsec; } return 0; } static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) { int pos; u32 val; pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, OCXL_DVSEC_FUNC_ID); if (!pos) return -ESRCH; pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); if (val & AFU_PRESENT) *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; else *afu_idx = -1; return 0; } static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) { int pos; u16 actag_sup; pos = find_dvsec_afu_ctrl(dev, afu_idx); if (!pos) return -ESRCH; pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &actag_sup); *actag = actag_sup & ACTAG_MASK; return 0; } static struct npu_link *find_link(struct pci_dev *dev) { struct npu_link *link; list_for_each_entry(link, &links_list, list) { /* The functions of a device all share the same link */ if (link->domain == pci_domain_nr(dev->bus) && link->bus == dev->bus->number && link->dev == PCI_SLOT(dev->devfn)) { return link; } } /* link doesn't exist yet. Allocate one */ link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); if (!link) return NULL; link->domain = pci_domain_nr(dev->bus); link->bus = dev->bus->number; link->dev = PCI_SLOT(dev->devfn); list_add(&link->list, &links_list); return link; } static void pnv_ocxl_fixup_actag(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; struct npu_link *link; int rc, afu_idx = -1, i, actag; if (!machine_is(powernv)) return; if (phb->type != PNV_PHB_NPU_OCAPI) return; mutex_lock(&links_list_lock); link = find_link(dev); if (!link) { dev_warn(&dev->dev, "couldn't update actag information\n"); mutex_unlock(&links_list_lock); return; } /* * Check how many actags are desired for the AFUs under that * function and add it to the count for the link */ rc = get_max_afu_index(dev, &afu_idx); if (rc) { /* Most likely an invalid config space */ dev_dbg(&dev->dev, "couldn't find AFU information\n"); afu_idx = -1; } link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; for (i = 0; i <= afu_idx; i++) { /* * AFU index 'holes' are allowed. So don't fail if we * can't read the actag info for an index */ rc = get_actag_count(dev, i, &actag); if (rc) continue; link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; } dev_dbg(&dev->dev, "total actags for function: %d\n", link->fn_desired_actags[PCI_FUNC(dev->devfn)]); mutex_unlock(&links_list_lock); } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); static u16 assign_fn_actags(u16 desired, u16 total) { u16 count; if (total <= PNV_OCXL_ACTAG_MAX) count = desired; else count = PNV_OCXL_ACTAG_MAX * desired / total; return count; } static void assign_actags(struct npu_link *link) { u16 actag_count, range_start = 0, total_desired = 0; int i; for (i = 0; i < 8; i++) total_desired += link->fn_desired_actags[i]; for (i = 0; i < 8; i++) { if (link->fn_desired_actags[i]) { actag_count = assign_fn_actags( link->fn_desired_actags[i], total_desired); link->fn_actags[i].start = range_start; link->fn_actags[i].count = actag_count; range_start += actag_count; WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); } pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", link->domain, link->bus, link->dev, i, link->fn_actags[i].start, link->fn_actags[i].count, link->fn_desired_actags[i]); } link->assignment_done = true; } int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported) { struct npu_link *link; mutex_lock(&links_list_lock); link = find_link(dev); if (!link) { dev_err(&dev->dev, "actag information not found\n"); mutex_unlock(&links_list_lock); return -ENODEV; } /* * On p9, we only have 64 actags per link, so they must be * shared by all the functions of the same adapter. We counted * the desired actag counts during PCI enumeration, so that we * can allocate a pro-rated number of actags to each function. */ if (!link->assignment_done) assign_actags(link); *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; mutex_unlock(&links_list_lock); return 0; } EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) { struct npu_link *link; int i, rc = -EINVAL; /* * The number of PASIDs (process address space ID) which can * be used by a function depends on how many functions exist * on the device. The NPU needs to be configured to know how * many bits are available to PASIDs and how many are to be * used by the function BDF identifier. * * We only support one AFU-carrying function for now. */ mutex_lock(&links_list_lock); link = find_link(dev); if (!link) { dev_err(&dev->dev, "actag information not found\n"); mutex_unlock(&links_list_lock); return -ENODEV; } for (i = 0; i < 8; i++) if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { *count = PNV_OCXL_PASID_MAX; rc = 0; break; } mutex_unlock(&links_list_lock); dev_dbg(&dev->dev, "%d PASIDs available for function\n", rc ? 0 : *count); return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) { int shift, idx; WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE); idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2; shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2)); buf[idx] |= rate << shift; } int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, char *rate_buf, int rate_buf_size) { if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) return -EINVAL; /* * The TL capabilities are a characteristic of the NPU, so * we go with hard-coded values. * * The receiving rate of each template is encoded on 4 bits. * * On P9: * - templates 0 -> 3 are supported * - templates 0, 1 and 3 have a 0 receiving rate * - template 2 has receiving rate of 1 (extra cycle) */ memset(rate_buf, 0, rate_buf_size); set_templ_rate(2, 1, rate_buf); *cap = PNV_OCXL_TL_P9_RECV_CAP; return 0; } EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap); int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, uint64_t rate_buf_phys, int rate_buf_size) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; int rc; if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) return -EINVAL; rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap, rate_buf_phys, rate_buf_size); if (rc) { dev_err(&dev->dev, "Can't configure host TL: %d\n", rc); return -EINVAL; } return 0; } EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf); int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq) { int rc; rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq); if (rc) { dev_err(&dev->dev, "Can't get translation interrupt for device\n"); return rc; } return 0; } EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq); void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, void __iomem *tfc, void __iomem *pe_handle) { iounmap(dsisr); iounmap(dar); iounmap(tfc); iounmap(pe_handle); } EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs); int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, void __iomem **dar, void __iomem **tfc, void __iomem **pe_handle) { u64 reg; int i, j, rc = 0; void __iomem *regs[4]; /* * opal stores the mmio addresses of the DSISR, DAR, TFC and * PE_HANDLE registers in a device tree property, in that * order */ for (i = 0; i < 4; i++) { rc = of_property_read_u64_index(dev->dev.of_node, "ibm,opal-xsl-mmio", i, ®); if (rc) break; regs[i] = ioremap(reg, 8); if (!regs[i]) { rc = -EINVAL; break; } } if (rc) { dev_err(&dev->dev, "Can't map translation mmio registers\n"); for (j = i - 1; j >= 0; j--) iounmap(regs[j]); } else { *dsisr = regs[0]; *dar = regs[1]; *tfc = regs[2]; *pe_handle = regs[3]; } return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs); struct spa_data { u64 phb_opal_id; u32 bdfn; }; int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **platform_data) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; struct spa_data *data; u32 bdfn; int rc; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; bdfn = pci_dev_id(dev); rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), PE_mask); if (rc) { dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc); kfree(data); return rc; } data->phb_opal_id = phb->opal_id; data->bdfn = bdfn; *platform_data = (void *) data; return 0; } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup); void pnv_ocxl_spa_release(void *platform_data) { struct spa_data *data = (struct spa_data *) platform_data; int rc; rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0); WARN_ON(rc); kfree(data); } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) { struct spa_data *data = (struct spa_data *) platform_data; return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); } EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, uint64_t lpcr, void __iomem **arva) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; u64 mmio_atsd; int rc; /* ATSD physical address. * ATSD LAUNCH register: write access initiates a shoot down to * initiate the TLB Invalidate command. */ rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 0, &mmio_atsd); if (rc) { dev_info(&dev->dev, "No available ATSD found\n"); return rc; } /* Assign a register set to a Logical Partition and MMIO ATSD * LPARID register to the required value. */ rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), lparid, lpcr); if (rc) { dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc); return rc; } *arva = ioremap(mmio_atsd, 24); if (!(*arva)) { dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); rc = -ENOMEM; } return rc; } EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); void pnv_ocxl_unmap_lpar(void __iomem *arva) { iounmap(arva); } EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); void pnv_ocxl_tlb_invalidate(void __iomem *arva, unsigned long pid, unsigned long addr, unsigned long page_size) { unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); u64 val = 0ull; int pend; u8 size; if (!(arva)) return; if (addr) { /* load Abbreviated Virtual Address register with * the necessary value */ val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); out_be64(arva + PNV_OCXL_ATSD_AVA, val); } /* Write access initiates a shoot down to initiate the * TLB Invalidate command */ val = PNV_OCXL_ATSD_LNCH_R; val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); if (addr) val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); else { val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; } val |= PNV_OCXL_ATSD_LNCH_PRS; /* Actual Page Size to be invalidated * 000 4KB * 101 64KB * 001 2MB * 010 1GB */ size = 0b101; if (page_size == 0x1000) size = 0b000; if (page_size == 0x200000) size = 0b001; if (page_size == 0x40000000) size = 0b010; val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size); val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid); out_be64(arva + PNV_OCXL_ATSD_LNCH, val); /* Poll the ATSD status register to determine when the * TLB Invalidate has been completed. */ val = in_be64(arva + PNV_OCXL_ATSD_STAT); pend = val >> 63; while (pend) { if (time_after_eq(jiffies, timeout)) { pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n", __func__, val, pid); return; } cpu_relax(); val = in_be64(arva + PNV_OCXL_ATSD_STAT); pend = val >> 63; } } EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);