1/* 2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34/* 35 * This file is conditionally built on x86_64 only. Otherwise weak symbol 36 * versions of the functions exported from here are used. 37 */ 38 39#include <linux/pci.h> 40#include <asm/mtrr.h> 41#include <asm/processor.h> 42 43#include "ipath_kernel.h" 44 45/** 46 * ipath_enable_wc - enable write combining for MMIO writes to the device 47 * @dd: infinipath device 48 * 49 * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable 50 * write combining. 51 */ 52int ipath_enable_wc(struct ipath_devdata *dd) 53{ 54 int ret = 0; 55 u64 pioaddr, piolen; 56 unsigned bits; 57 const unsigned long addr = pci_resource_start(dd->pcidev, 0); 58 const size_t len = pci_resource_len(dd->pcidev, 0); 59 60 /* 61 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the 62 * chip. Linux (possibly the hardware) requires it to be on a power 63 * of 2 address matching the length (which has to be a power of 2). 64 * For rev1, that means the base address, for rev2, it will be just 65 * the PIO buffers themselves. 66 * For chips with two sets of buffers, the calculations are 67 * somewhat more complicated; we need to sum, and the piobufbase 68 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. 69 * The buffers are still packed, so a single range covers both. 70 */ 71 if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */ 72 unsigned long pio2kbase, pio4kbase; 73 pio2kbase = dd->ipath_piobufbase & 0xffffffffUL; 74 pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL; 75 if (pio2kbase < pio4kbase) { /* all, for now */ 76 pioaddr = addr + pio2kbase; 77 piolen = pio4kbase - pio2kbase + 78 dd->ipath_piobcnt4k * dd->ipath_4kalign; 79 } else { 80 pioaddr = addr + pio4kbase; 81 piolen = pio2kbase - pio4kbase + 82 dd->ipath_piobcnt2k * dd->ipath_palign; 83 } 84 } else { /* single buffer size (2K, currently) */ 85 pioaddr = addr + dd->ipath_piobufbase; 86 piolen = dd->ipath_piobcnt2k * dd->ipath_palign + 87 dd->ipath_piobcnt4k * dd->ipath_4kalign; 88 } 89 90 for (bits = 0; !(piolen & (1ULL << bits)); bits++) 91 /* do nothing */ ; 92 93 if (piolen != (1ULL << bits)) { 94 piolen >>= bits; 95 while (piolen >>= 1) 96 bits++; 97 piolen = 1ULL << (bits + 1); 98 } 99 if (pioaddr & (piolen - 1)) { 100 u64 atmp; 101 ipath_dbg("pioaddr %llx not on right boundary for size " 102 "%llx, fixing\n", 103 (unsigned long long) pioaddr, 104 (unsigned long long) piolen); 105 atmp = pioaddr & ~(piolen - 1); 106 if (atmp < addr || (atmp + piolen) > (addr + len)) { 107 ipath_dev_err(dd, "No way to align address/size " 108 "(%llx/%llx), no WC mtrr\n", 109 (unsigned long long) atmp, 110 (unsigned long long) piolen << 1); 111 ret = -ENODEV; 112 } else { 113 ipath_dbg("changing WC base from %llx to %llx, " 114 "len from %llx to %llx\n", 115 (unsigned long long) pioaddr, 116 (unsigned long long) atmp, 117 (unsigned long long) piolen, 118 (unsigned long long) piolen << 1); 119 pioaddr = atmp; 120 piolen <<= 1; 121 } 122 } 123 124 if (!ret) { 125 int cookie; 126 ipath_cdbg(VERBOSE, "Setting mtrr for chip to WC " 127 "(addr %llx, len=0x%llx)\n", 128 (unsigned long long) pioaddr, 129 (unsigned long long) piolen); 130 cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); 131 if (cookie < 0) { 132 { 133 dev_info(&dd->pcidev->dev, 134 "mtrr_add() WC for PIO bufs " 135 "failed (%d)\n", 136 cookie); 137 ret = -EINVAL; 138 } 139 } else { 140 ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, " 141 "cookie is %d\n", cookie); 142 dd->ipath_wc_cookie = cookie; 143 dd->ipath_wc_base = (unsigned long) pioaddr; 144 dd->ipath_wc_len = (unsigned long) piolen; 145 } 146 } 147 148 return ret; 149} 150 151/** 152 * ipath_disable_wc - disable write combining for MMIO writes to the device 153 * @dd: infinipath device 154 */ 155void ipath_disable_wc(struct ipath_devdata *dd) 156{ 157 if (dd->ipath_wc_cookie) { 158 int r; 159 ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n"); 160 r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base, 161 dd->ipath_wc_len); 162 if (r < 0) 163 dev_info(&dd->pcidev->dev, 164 "mtrr_del(%lx, %lx, %lx) failed: %d\n", 165 dd->ipath_wc_cookie, dd->ipath_wc_base, 166 dd->ipath_wc_len, r); 167 dd->ipath_wc_cookie = 0; /* even on failure */ 168 } 169} 170 171/** 172 * ipath_unordered_wc - indicate whether write combining is ordered 173 * 174 * Because our performance depends on our ability to do write combining mmio 175 * writes in the most efficient way, we need to know if we are on an Intel 176 * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in 177 * the order completed, and so no special flushing is required to get 178 * correct ordering. Intel processors, however, will flush write buffers 179 * out in "random" orders, and so explicit ordering is needed at times. 180 */ 181int ipath_unordered_wc(void) 182{ 183 return boot_cpu_data.x86_vendor != X86_VENDOR_AMD; 184} 185