175406Sache// SPDX-License-Identifier: GPL-2.0-or-later 275406Sache/* 375406Sache * Copyright 2014 IBM Corp. 475406Sache */ 575406Sache 675406Sache#include <linux/spinlock.h> 775406Sache#include <linux/kernel.h> 875406Sache#include <linux/module.h> 9136644Sache#include <linux/device.h> 1075406Sache#include <linux/mutex.h> 11136644Sache#include <linux/init.h> 1275406Sache#include <linux/list.h> 1375406Sache#include <linux/mm.h> 1475406Sache#include <linux/of.h> 1575406Sache#include <linux/slab.h> 1675406Sache#include <linux/idr.h> 1775406Sache#include <linux/pci.h> 1875406Sache#include <linux/platform_device.h> 1975406Sache#include <linux/sched/task.h> 2075406Sache 2175406Sache#include <asm/cputable.h> 2275406Sache#include <asm/mmu.h> 2375406Sache#include <misc/cxl-base.h> 2475406Sache 2575406Sache#include "cxl.h" 2675406Sache#include "trace.h" 2775406Sache 2875406Sachestatic DEFINE_SPINLOCK(adapter_idr_lock); 2975406Sachestatic DEFINE_IDR(cxl_adapter_idr); 3075406Sache 3175406Sacheuint cxl_verbose; 3275406Sachemodule_param_named(verbose, cxl_verbose, uint, 0600); 3375406SacheMODULE_PARM_DESC(verbose, "Enable verbose dmesg output"); 3475406Sache 3575406Sacheconst struct cxl_backend_ops *cxl_ops; 3675406Sache 3775406Sacheint cxl_afu_slbia(struct cxl_afu *afu) 3875406Sache{ 3975406Sache unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); 4075406Sache 4175406Sache pr_devel("cxl_afu_slbia issuing SLBIA command\n"); 4275406Sache cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); 43119610Sache while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { 44119610Sache if (time_after_eq(jiffies, timeout)) { 4575406Sache dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); 4675406Sache return -EBUSY; 4775406Sache } 4875406Sache /* If the adapter has gone down, we can assume that we 4975406Sache * will PERST it and that will invalidate everything. 5075406Sache */ 5175406Sache if (!cxl_ops->link_ok(afu->adapter, afu)) 5275406Sache return -EIO; 5375406Sache cpu_relax(); 5475406Sache } 5575406Sache return 0; 5675406Sache} 5775406Sache 5875406Sachestatic inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm) 5975406Sache{ 6075406Sache unsigned long flags; 6175406Sache 6275406Sache if (ctx->mm != mm) 6375406Sache return; 6475406Sache 6575406Sache pr_devel("%s matched mm - card: %i afu: %i pe: %i\n", __func__, 6675406Sache ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe); 6775406Sache 6875406Sache spin_lock_irqsave(&ctx->sste_lock, flags); 6975406Sache trace_cxl_slbia(ctx); 7075406Sache memset(ctx->sstp, 0, ctx->sst_size); 7175406Sache spin_unlock_irqrestore(&ctx->sste_lock, flags); 7275406Sache mb(); 7375406Sache cxl_afu_slbia(ctx->afu); 7475406Sache} 7575406Sache 7675406Sachestatic inline void cxl_slbia_core(struct mm_struct *mm) 7775406Sache{ 7875406Sache struct cxl *adapter; 7975406Sache struct cxl_afu *afu; 8075406Sache struct cxl_context *ctx; 8175406Sache int card, slice, id; 8275406Sache 8375406Sache pr_devel("%s called\n", __func__); 8475406Sache 8575406Sache spin_lock(&adapter_idr_lock); 8675406Sache idr_for_each_entry(&cxl_adapter_idr, adapter, card) { 8775406Sache /* XXX: Make this lookup faster with link from mm to ctx */ 8875406Sache spin_lock(&adapter->afu_list_lock); 8975406Sache for (slice = 0; slice < adapter->slices; slice++) { 9075406Sache afu = adapter->afu[slice]; 9175406Sache if (!afu || !afu->enabled) 9275406Sache continue; 9375406Sache rcu_read_lock(); 9475406Sache idr_for_each_entry(&afu->contexts_idr, ctx, id) 9575406Sache _cxl_slbia(ctx, mm); 9675406Sache rcu_read_unlock(); 9775406Sache } 9875406Sache spin_unlock(&adapter->afu_list_lock); 9975406Sache } 10075406Sache spin_unlock(&adapter_idr_lock); 10175406Sache} 10275406Sache 10375406Sachestatic struct cxl_calls cxl_calls = { 10475406Sache .cxl_slbia = cxl_slbia_core, 10575406Sache .owner = THIS_MODULE, 10675406Sache}; 10775406Sache 10875406Sacheint cxl_alloc_sst(struct cxl_context *ctx) 10975406Sache{ 11075406Sache unsigned long vsid; 11175406Sache u64 ea_mask, size, sstp0, sstp1; 11275406Sache 11375406Sache sstp0 = 0; 11475406Sache sstp1 = 0; 11575406Sache 11675406Sache ctx->sst_size = PAGE_SIZE; 11775406Sache ctx->sst_lru = 0; 11875406Sache ctx->sstp = (struct cxl_sste *)get_zeroed_page(GFP_KERNEL); 11975406Sache if (!ctx->sstp) { 12075406Sache pr_err("cxl_alloc_sst: Unable to allocate segment table\n"); 12175406Sache return -ENOMEM; 12275406Sache } 12375406Sache pr_devel("SSTP allocated at 0x%p\n", ctx->sstp); 12475406Sache 12575406Sache vsid = get_kernel_vsid((u64)ctx->sstp, mmu_kernel_ssize) << 12; 12675406Sache 12775406Sache sstp0 |= (u64)mmu_kernel_ssize << CXL_SSTP0_An_B_SHIFT; 12875406Sache sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50; 12975406Sache 13075406Sache size = (((u64)ctx->sst_size >> 8) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT; 13175406Sache if (unlikely(size & ~CXL_SSTP0_An_SegTableSize_MASK)) { 13275406Sache WARN(1, "Impossible segment table size\n"); 13375406Sache return -EINVAL; 13475406Sache } 13575406Sache sstp0 |= size; 13675406Sache 13775406Sache if (mmu_kernel_ssize == MMU_SEGSIZE_256M) 13875406Sache ea_mask = 0xfffff00ULL; 13975406Sache else 14075406Sache ea_mask = 0xffffffff00ULL; 14175406Sache 14275406Sache sstp0 |= vsid >> (50-14); /* Top 14 bits of VSID */ 14375406Sache sstp1 |= (vsid << (64-(50-14))) & ~ea_mask; 14475406Sache sstp1 |= (u64)ctx->sstp & ea_mask; 14575406Sache sstp1 |= CXL_SSTP1_An_V; 14675406Sache 14775406Sache pr_devel("Looked up %#llx: slbfee. %#llx (ssize: %x, vsid: %#lx), copied to SSTP0: %#llx, SSTP1: %#llx\n", 14875406Sache (u64)ctx->sstp, (u64)ctx->sstp & ESID_MASK, mmu_kernel_ssize, vsid, sstp0, sstp1); 14975406Sache 15075406Sache /* Store calculated sstp hardware points for use later */ 15175406Sache ctx->sstp0 = sstp0; 15275406Sache ctx->sstp1 = sstp1; 15375406Sache 15475406Sache return 0; 15575406Sache} 15675406Sache 15775406Sache/* print buffer content as integers when debugging */ 15875406Sachevoid cxl_dump_debug_buffer(void *buf, size_t buf_len) 15975406Sache{ 16075406Sache#ifdef DEBUG 16175406Sache int i, *ptr; 16275406Sache 16375406Sache /* 16475406Sache * We want to regroup up to 4 integers per line, which means they 16575406Sache * need to be in the same pr_devel() statement 16675406Sache */ 16775406Sache ptr = (int *) buf; 16875406Sache for (i = 0; i * 4 < buf_len; i += 4) { 16975406Sache if ((i + 3) * 4 < buf_len) 17075406Sache pr_devel("%.8x %.8x %.8x %.8x\n", ptr[i], ptr[i + 1], 17175406Sache ptr[i + 2], ptr[i + 3]); 17275406Sache else if ((i + 2) * 4 < buf_len) 17375406Sache pr_devel("%.8x %.8x %.8x\n", ptr[i], ptr[i + 1], 17475406Sache ptr[i + 2]); 17575406Sache else if ((i + 1) * 4 < buf_len) 17675406Sache pr_devel("%.8x %.8x\n", ptr[i], ptr[i + 1]); 17775406Sache else 17875406Sache pr_devel("%.8x\n", ptr[i]); 17975406Sache } 18075406Sache#endif /* DEBUG */ 18175406Sache} 18275406Sache 18375406Sache/* Find a CXL adapter by it's number and increase it's refcount */ 18475406Sachestruct cxl *get_cxl_adapter(int num) 18575406Sache{ 18675406Sache struct cxl *adapter; 18775406Sache 18875406Sache spin_lock(&adapter_idr_lock); 18975406Sache if ((adapter = idr_find(&cxl_adapter_idr, num))) 19075406Sache get_device(&adapter->dev); 19175406Sache spin_unlock(&adapter_idr_lock); 19275406Sache 19375406Sache return adapter; 19475406Sache} 19575406Sache 19675406Sachestatic int cxl_alloc_adapter_nr(struct cxl *adapter) 19775406Sache{ 19875406Sache int i; 19975406Sache 20075406Sache idr_preload(GFP_KERNEL); 20175406Sache spin_lock(&adapter_idr_lock); 20275406Sache i = idr_alloc(&cxl_adapter_idr, adapter, 0, 0, GFP_NOWAIT); 20375406Sache spin_unlock(&adapter_idr_lock); 20475406Sache idr_preload_end(); 20575406Sache if (i < 0) 20675406Sache return i; 20775406Sache 20875406Sache adapter->adapter_num = i; 20975406Sache 21075406Sache return 0; 21175406Sache} 21275406Sache 21375406Sachevoid cxl_remove_adapter_nr(struct cxl *adapter) 21475406Sache{ 21575406Sache idr_remove(&cxl_adapter_idr, adapter->adapter_num); 21675406Sache} 21775406Sache 21875406Sachestruct cxl *cxl_alloc_adapter(void) 21975406Sache{ 22075406Sache struct cxl *adapter; 22175406Sache 22275406Sache if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) 22375406Sache return NULL; 22475406Sache 22575406Sache spin_lock_init(&adapter->afu_list_lock); 22675406Sache 22775406Sache if (cxl_alloc_adapter_nr(adapter)) 22875406Sache goto err1; 22975406Sache 23075406Sache if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) 23175406Sache goto err2; 23275406Sache 23375406Sache /* start with context lock taken */ 23475406Sache atomic_set(&adapter->contexts_num, -1); 23575406Sache 23675406Sache return adapter; 23775406Sacheerr2: 23875406Sache cxl_remove_adapter_nr(adapter); 23975406Sacheerr1: 24075406Sache kfree(adapter); 24175406Sache return NULL; 24275406Sache} 24375406Sache 24475406Sachestruct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) 24575406Sache{ 24675406Sache struct cxl_afu *afu; 24775406Sache 24875406Sache if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) 24975406Sache return NULL; 25075406Sache 25175406Sache afu->adapter = adapter; 252136644Sache afu->dev.parent = &adapter->dev; 253136644Sache afu->dev.release = cxl_ops->release_afu; 254136644Sache afu->slice = slice; 255136644Sache idr_init(&afu->contexts_idr); 25675406Sache mutex_init(&afu->contexts_lock); 25775406Sache spin_lock_init(&afu->afu_cntl_lock); 25875406Sache atomic_set(&afu->configured_state, -1); 25975406Sache afu->prefault_mode = CXL_PREFAULT_NONE; 26075406Sache afu->irqs_max = afu->adapter->user_irqs; 26175406Sache 26275406Sache return afu; 26375406Sache} 26475406Sache 26575406Sacheint cxl_afu_select_best_mode(struct cxl_afu *afu) 26675406Sache{ 26775406Sache if (afu->modes_supported & CXL_MODE_DIRECTED) 26875406Sache return cxl_ops->afu_activate_mode(afu, CXL_MODE_DIRECTED); 26975406Sache 27075406Sache if (afu->modes_supported & CXL_MODE_DEDICATED) 27175406Sache return cxl_ops->afu_activate_mode(afu, CXL_MODE_DEDICATED); 27275406Sache 27375406Sache dev_warn(&afu->dev, "No supported programming modes available\n"); 27475406Sache /* We don't fail this so the user can inspect sysfs */ 27575406Sache return 0; 27675406Sache} 27775406Sache 27875406Sacheint cxl_adapter_context_get(struct cxl *adapter) 27975406Sache{ 28075406Sache int rc; 28175406Sache 28275406Sache rc = atomic_inc_unless_negative(&adapter->contexts_num); 28375406Sache return rc ? 0 : -EBUSY; 28475406Sache} 28575406Sache 28675406Sachevoid cxl_adapter_context_put(struct cxl *adapter) 28775406Sache{ 28875406Sache atomic_dec_if_positive(&adapter->contexts_num); 28975406Sache} 29075406Sache 29175406Sacheint cxl_adapter_context_lock(struct cxl *adapter) 29275406Sache{ 29375406Sache int rc; 29475406Sache /* no active contexts -> contexts_num == 0 */ 29575406Sache rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1); 29675406Sache return rc ? -EBUSY : 0; 29775406Sache} 29875406Sache 299136644Sachevoid cxl_adapter_context_unlock(struct cxl *adapter) 30075406Sache{ 30175406Sache int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0); 30275406Sache 30375406Sache /* 30475406Sache * contexts lock taken -> contexts_num == -1 30575406Sache * If not true then show a warning and force reset the lock. 30675406Sache * This will happen when context_unlock was requested without 30775406Sache * doing a context_lock. 30875406Sache */ 30975406Sache if (val != -1) { 31075406Sache atomic_set(&adapter->contexts_num, 0); 31175406Sache WARN(1, "Adapter context unlocked with %d active contexts", 31275406Sache val); 31375406Sache } 31475406Sache} 31575406Sache 31675406Sachestatic int __init init_cxl(void) 31775406Sache{ 31875406Sache int rc = 0; 31975406Sache 32075406Sache if (!tlbie_capable) 32175406Sache return -EINVAL; 32275406Sache 32375406Sache if ((rc = cxl_file_init())) 32475406Sache return rc; 32575406Sache 32675406Sache cxl_debugfs_init(); 32775406Sache 32875406Sache /* 32975406Sache * we don't register the callback on P9. slb callack is only 33075406Sache * used for the PSL8 MMU and CX4. 33175406Sache */ 33275406Sache if (cxl_is_power8()) { 33375406Sache rc = register_cxl_calls(&cxl_calls); 33475406Sache if (rc) 33575406Sache goto err; 33675406Sache } 33775406Sache 33875406Sache if (cpu_has_feature(CPU_FTR_HVMODE)) { 33975406Sache cxl_ops = &cxl_native_ops; 34075406Sache rc = pci_register_driver(&cxl_pci_driver); 34175406Sache } 34275406Sache#ifdef CONFIG_PPC_PSERIES 34375406Sache else { 34475406Sache cxl_ops = &cxl_guest_ops; 34575406Sache rc = platform_driver_register(&cxl_of_driver); 34675406Sache } 34775406Sache#endif 34875406Sache if (rc) 34975406Sache goto err1; 35075406Sache 35175406Sache return 0; 35275406Sacheerr1: 353136644Sache if (cxl_is_power8()) 354136644Sache unregister_cxl_calls(&cxl_calls); 355136644Sacheerr: 356136644Sache cxl_debugfs_exit(); 35775406Sache cxl_file_exit(); 35875406Sache 35975406Sache return rc; 36075406Sache} 36175406Sache 362136644Sachestatic void exit_cxl(void) 363136644Sache{ 364136644Sache if (cpu_has_feature(CPU_FTR_HVMODE)) 365136644Sache pci_unregister_driver(&cxl_pci_driver); 366136644Sache#ifdef CONFIG_PPC_PSERIES 36775406Sache else 36875406Sache platform_driver_unregister(&cxl_of_driver); 369136644Sache#endif 370136644Sache 37175406Sache cxl_debugfs_exit(); 37275406Sache cxl_file_exit(); 37375406Sache if (cxl_is_power8()) 37475406Sache unregister_cxl_calls(&cxl_calls); 37575406Sache idr_destroy(&cxl_adapter_idr); 37675406Sache} 37775406Sache 37875406Sachemodule_init(init_cxl); 37975406Sachemodule_exit(exit_cxl); 380119610Sache 381119610SacheMODULE_DESCRIPTION("IBM Coherent Accelerator"); 38275406SacheMODULE_AUTHOR("Ian Munsie <imunsie@au1.ibm.com>"); 38375406SacheMODULE_LICENSE("GPL"); 38475406Sache