/** * \file * \brief PCI driver * * This file walks through the PCI bus, enumarates each device and gathers * informatiom about each device. */ /* * Copyright (c) 2007, 2008, 2009, 2010, 2011, ETH Zurich. * All rights reserved. * * This file is distributed under the terms in the attached LICENSE file. * If you do not find this file, copies can be found by writing to: * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group. */ #include #include #include #include #include #include #include #include #include #include #include #include "pci.h" #include "ht_config.h" #include #include "pci_debug.h" #include #include // Just for debug print #define BAR_PROBE 0xffffffff #define PAGE_BITS BASE_PAGE_BITS struct device_caps { struct capref phys_cap; struct capref frame_cap; uint8_t bar_nr; uint8_t bits; bool assigned; //false => this entry is not in use uint8_t type; }; struct device_caps dev_caps[PCI_NBUSES][PCI_NDEVICES][PCI_NFUNCTIONS][PCI_NBARS]; //const char *skb_bridge_program = "bridge_page"; const char *skb_bridge_program = "bridge_bios"; bool decoding_net = false; uint16_t max_numvfs = 255; bool enable_vfs = false; static void query_bars(pci_hdr0_t devhdr, struct pci_address addr, bool pci2pci_bridge); static void enable_busmaster(uint8_t bus, uint8_t dev, uint8_t fun, bool pcie); static uint32_t bar_mapping_size(pci_hdr0_bar32_t bar) { if (bar.base == 0) { return 0; } for (uint32_t mask = 1;; mask <<= 1) { assert(mask != 0); if (bar.base & mask) { return mask << 7; } } } static pciaddr_t bar_mapping_size64(uint64_t base) { if (base == 0) { return 0; } for (pciaddr_t mask = 1;; mask <<= 1) { assert(mask != 0); if (base & mask) { /* * Note: we get the actual raw register content here and not * the bar.base value so no shift. * - 2014-05-03, RA */ return mask; } } } void pci_init_datastructures(void) { memset(dev_caps, 0, sizeof(dev_caps)); } int pci_bar_to_caps_index(uint8_t bus, uint8_t dev, uint8_t fun, uint8_t BAR) { uint8_t i; for (i = 0; i < PCI_NBARS && dev_caps[bus][dev][fun][i].assigned; i++) { if (dev_caps[bus][dev][fun][i].bar_nr == BAR) { return i; } } return -1; } int pci_get_bar_nr_for_index(uint8_t bus, uint8_t dev, uint8_t fun, uint8_t idx) { return (dev_caps[bus][dev][fun][idx].bar_nr); } struct capref pci_get_bar_cap_for_device(uint8_t bus, uint8_t dev, uint8_t fun, uint8_t idx ) { return (dev_caps[bus][dev][fun][idx].frame_cap); } uint8_t pci_get_bar_cap_type_for_device(uint8_t bus, uint8_t dev, uint8_t fun, uint8_t idx) { return (dev_caps[bus][dev][fun][idx].type); } static errval_t alloc_device_bar(uint8_t idx, uint8_t bus, uint8_t dev, uint8_t fun, uint8_t BAR, pciaddr_t base, pciaddr_t high, pcisize_t size) { struct acpi_binding* acl = get_acpi_binding(); struct device_caps *c = &dev_caps[bus][dev][fun][idx]; errval_t err; size = ROUND_UP(size, BASE_PAGE_SIZE); // Some BARs are less than 4 KiB uint8_t bits = log2ceil(size); pcisize_t framesize = 1UL << bits; PCI_DEBUG("getting cap for BAR of size %"PRIuPCISIZE"\n", size); errval_t error_code; err = slot_alloc(&c->phys_cap); assert(err_is_ok(err)); err = acl->rpc_tx_vtbl.mm_alloc_range_proxy(acl, bits, base, base + framesize, &c->phys_cap, &error_code); assert(err_is_ok(err)); err = error_code; if (err_is_fail(err)) { PCI_DEBUG("mm_alloc_range() failed: bits = %hhu, base = %"PRIxPCIADDR"," " end = %"PRIxPCIADDR"\n", bits, base, base + framesize); return err; } err = devframe_type(&c->frame_cap, c->phys_cap, bits); if (err_is_fail(err)) { PCI_DEBUG("devframe_type() failed: bits = %hhu, base = %"PRIxPCIADDR ", doba = %"PRIxPCIADDR"\n", bits, base, base + (1UL << bits)); return err; } c->bits = bits; c->bar_nr = BAR; c->assigned = true; c->type = 0; return SYS_ERR_OK; } //XXX: FIXME: HACK: BAD!!! Only needed to allocate a full I/O range cap to // the VESA graphics driver static errval_t assign_complete_io_range(uint8_t idx, uint8_t bus, uint8_t dev, uint8_t fun, uint8_t BAR) { errval_t err = slot_alloc(&(dev_caps[bus][dev][fun][idx].frame_cap)); assert(err_is_ok(err)); err = cap_copy(dev_caps[bus][dev][fun][idx].frame_cap, cap_io); assert(err_is_ok(err)); dev_caps[bus][dev][fun][idx].bits = 16; dev_caps[bus][dev][fun][idx].bar_nr = BAR; dev_caps[bus][dev][fun][idx].assigned = true; dev_caps[bus][dev][fun][idx].type = 1; return SYS_ERR_OK; } errval_t device_init(uint32_t class_code, uint32_t sub_class, uint32_t prog_if, uint32_t vendor_id, uint32_t device_id, uint32_t *bus, uint32_t *dev, uint32_t *fun, bool *pcie, int *nr_allocated_bars) { *nr_allocated_bars = 0; errval_t err; char s_bus[10], s_dev[10], s_fun[10], s_vendor_id[10], s_device_id[10]; char s_class_code[10], s_sub_class[10], s_prog_if[10]; char s_pcie[5]; int error_code; int bar_nr; pciaddr_t bar_base, bar_high; pcisize_t bar_size; if (*bus != PCI_DONT_CARE) { snprintf(s_bus, sizeof(s_bus), "%"PRIu32"", *bus); } else { strncpy(s_bus, "Bus", sizeof(s_bus)); } if (*dev != PCI_DONT_CARE) { snprintf(s_dev, sizeof(s_dev), "%"PRIu32, *dev); } else { strncpy(s_dev, "Dev", sizeof(s_dev)); } if (*fun != PCI_DONT_CARE) { snprintf(s_fun, sizeof(s_fun), "%"PRIu32, *fun); } else { strncpy(s_fun, "Fun", sizeof(s_fun)); } if (vendor_id != PCI_DONT_CARE) { snprintf(s_vendor_id, sizeof(s_vendor_id), "%"PRIu32, vendor_id); } else { strncpy(s_vendor_id, "Ven", sizeof(s_vendor_id)); } if (device_id != PCI_DONT_CARE) { snprintf(s_device_id, sizeof(s_device_id), "%"PRIu32, device_id); } else { strncpy(s_device_id, "DevID", sizeof(s_device_id)); } if (class_code != PCI_DONT_CARE) { snprintf(s_class_code, sizeof(s_class_code), "%"PRIu32, class_code); } else { strncpy(s_class_code, "Cl", sizeof(s_class_code)); } if (sub_class != PCI_DONT_CARE) { snprintf(s_sub_class, sizeof(s_sub_class), "%"PRIu32, sub_class); } else { strncpy(s_sub_class, "Sub", sizeof(s_sub_class)); } if (prog_if != PCI_DONT_CARE) { snprintf(s_prog_if, sizeof(s_prog_if), "%"PRIu32, prog_if); } else { strncpy(s_prog_if, "ProgIf", sizeof(s_prog_if)); } PCI_DEBUG("device_init(): Searching device %s, %s, %s, %s, %s, %s, %s, %s\n", s_bus, s_dev, s_fun, s_vendor_id, s_device_id, s_class_code, s_sub_class, s_prog_if); //find the device: Unify all values error_code = skb_execute_query( "device(PCIE,addr(%s, %s, %s), %s, %s, %s, %s, %s, _)," "writeln(d(PCIE,%s,%s,%s,%s,%s,%s,%s,%s)).", s_bus, s_dev, s_fun, s_vendor_id, s_device_id, s_class_code, s_sub_class, s_prog_if, s_bus, s_dev, s_fun, s_vendor_id, s_device_id, s_class_code, s_sub_class, s_prog_if); if (error_code != 0) { DEBUG_SKB_ERR(error_code, "device_init()"); return err_push(error_code, PCI_ERR_DEVICE_INIT); } err = skb_read_output("d(%[a-z], %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32 ",%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32").", s_pcie, bus, dev, fun, &vendor_id, &device_id, &class_code, &sub_class, &prog_if); if (err_is_fail(err)) { DEBUG_ERR(err, "skb read output\n"); PCI_DEBUG("device_init(): Could not read the SKB's output for the device\n"); PCI_DEBUG("device_init(): SKB returned: %s\n", skb_get_output()); PCI_DEBUG("device_init(): SKB error returned: %s\n", skb_get_error_output()); return err_push(err, PCI_ERR_DEVICE_INIT); } if (strncmp(s_pcie, "pcie", strlen("pcie")) == 0) { *pcie = true; } else { *pcie = false; } PCI_DEBUG("device_init(): Found device at %u:%u:%u\n", *bus, *dev, *fun); //get the implemented BARs for the found device error_code = skb_execute_query("pci_get_implemented_BAR_addresses(%"PRIu32 ",%"PRIu32",%"PRIu32",%"PRIu32",%"PRIu32",%" PRIu32",%"PRIu32",%"PRIu32",L),length(L,Len)" ",writeln(L)", *bus, *dev, *fun, vendor_id, device_id, class_code, sub_class, prog_if); if (error_code != 0) { DEBUG_SKB_ERR(error_code, "device_init()"); return err_push(error_code, PCI_ERR_DEVICE_INIT); } struct list_parser_status status; skb_read_list_init(&status); //iterate over all buselements while (skb_read_list(&status, "baraddr(%d, %"PRIuPCIADDR", %"PRIuPCIADDR", " "%"PRIuPCISIZE")", &bar_nr, &bar_base, &bar_high, &bar_size)) { if(strncmp("bridge_page", skb_bridge_program, strlen("bridge_page")) == 0){ bar_base *= BASE_PAGE_SIZE; bar_high *= BASE_PAGE_SIZE; bar_size *= BASE_PAGE_SIZE; } err = alloc_device_bar(*nr_allocated_bars, *bus, *dev, *fun, bar_nr, bar_base, bar_high, bar_size); PCI_DEBUG("device_init(): BAR %d: base = %"PRIxPCIADDR ", size = %" PRIxPCISIZE"\n", bar_nr, bar_base, bar_size); if (err_is_fail(err)) { PCI_DEBUG("device_init(): Could not allocate cap for BAR %d\n", bar_nr); return err_push(err, PCI_ERR_DEVICE_INIT); } (*nr_allocated_bars)++; } //XXX: FIXME: HACK: BAD!!! Only needed to allocate a full I/O range cap to // the VESA graphics driver if (class_code == PCI_CLASS_DISPLAY) { assert(*nr_allocated_bars < PCI_NBARS); err = assign_complete_io_range(*nr_allocated_bars, *bus, *dev, *fun, 5 /*very BAAAD */); (*nr_allocated_bars)++; } //end of badness PCI_DEBUG("device_init(): Allocated caps for %d BARs\n", *nr_allocated_bars); PCI_DEBUG("enable busmaster for device (%u, %u, %u)...\n", *bus, *dev, *fun); enable_busmaster(*bus, *dev, *fun, *pcie); return SYS_ERR_OK; } errval_t device_reregister_interrupt(uint8_t coreid, int vector, uint32_t class_code, uint32_t sub_class, uint32_t prog_if, uint32_t vendor_id, uint32_t device_id, uint32_t *bus, uint32_t *dev,uint32_t *fun) { errval_t err; char s_bus[10], s_dev[10], s_fun[10], s_vendor_id[10], s_device_id[10]; char s_class_code[10], s_sub_class[10], s_prog_if[10]; char s_pcie[5]; bool pcie; int error_code; if (*bus != PCI_DONT_CARE) { snprintf(s_bus, sizeof(s_bus), "%"PRIu32"", *bus); } else { strncpy(s_bus, "Bus", sizeof(s_bus)); } if (*dev != PCI_DONT_CARE) { snprintf(s_dev, sizeof(s_dev), "%"PRIu32, *dev); } else { strncpy(s_dev, "Dev", sizeof(s_dev)); } if (*fun != PCI_DONT_CARE) { snprintf(s_fun, sizeof(s_fun), "%"PRIu32, *fun); } else { strncpy(s_fun, "Fun", sizeof(s_fun)); } if (vendor_id != PCI_DONT_CARE) { snprintf(s_vendor_id, sizeof(s_vendor_id), "%"PRIu32, vendor_id); } else { strncpy(s_vendor_id, "Ven", sizeof(s_vendor_id)); } if (device_id != PCI_DONT_CARE) { snprintf(s_device_id, sizeof(s_device_id), "%"PRIu32, device_id); } else { strncpy(s_device_id, "DevID", sizeof(s_device_id)); } if (class_code != PCI_DONT_CARE) { snprintf(s_class_code, sizeof(s_class_code), "%"PRIu32, class_code); } else { strncpy(s_class_code, "Cl", sizeof(s_class_code)); } if (sub_class != PCI_DONT_CARE) { snprintf(s_sub_class, sizeof(s_sub_class), "%"PRIu32, sub_class); } else { strncpy(s_sub_class, "Sub", sizeof(s_sub_class)); } if (prog_if != PCI_DONT_CARE) { snprintf(s_prog_if, sizeof(s_prog_if), "%"PRIu32, prog_if); } else { strncpy(s_prog_if, "ProgIf", sizeof(s_prog_if)); } PCI_DEBUG("device_init(): Searching device %s, %s, %s, %s, %s, %s, %s, %s\n", s_bus, s_dev, s_fun, s_vendor_id, s_device_id, s_class_code, s_sub_class, s_prog_if); //find the device: Unify all values error_code = skb_execute_query( "device(PCIE,addr(%s, %s, %s), %s, %s, %s, %s, %s, _)," "writeln(d(PCIE,%s,%s,%s,%s,%s,%s,%s,%s)).", s_bus, s_dev, s_fun, s_vendor_id, s_device_id, s_class_code, s_sub_class, s_prog_if, s_bus, s_dev, s_fun, s_vendor_id, s_device_id, s_class_code, s_sub_class, s_prog_if ); if (error_code != 0) { PCI_DEBUG("pci.c: device_init(): SKB returnd error code %s\n", err_getcode(error_code)); DEBUG_SKB_ERR(error_code, ""); return PCI_ERR_DEVICE_INIT; } err = skb_read_output("d(%[a-z], %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32 ",%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32").", s_pcie, bus, dev, fun, &vendor_id, &device_id, &class_code, &sub_class, &prog_if); if (err_is_fail(err)) { DEBUG_ERR(err, "skb read output\n"); PCI_DEBUG("device_init(): Could not read the SKB's output for the device\n"); PCI_DEBUG("device_init(): SKB returned: %s\n", skb_get_output()); PCI_DEBUG("device_init(): SKB error returned: %s\n", skb_get_error_output()); return err_push(err,PCI_ERR_DEVICE_INIT); } if(strncmp(s_pcie, "pcie", strlen("pcie")) == 0) { pcie = true; } else { pcie = false; } PCI_DEBUG("device_init(): Found device at %u:%u:%u\n", *bus, *dev, *fun); //get the implemented BARs for the found device int irq = pci_setup_interrupt(*bus, *dev, *fun); PCI_DEBUG("pci: init_device_handler_irq: init interrupt.\n"); PCI_DEBUG("pci: irq = %u, core = %hhu, vector = %u\n", irq, coreid, vector); struct acpi_binding* cl = get_acpi_binding(); errval_t ret_error; err = cl->rpc_tx_vtbl.enable_and_route_interrupt(cl, irq, coreid, vector, &ret_error); assert(err_is_ok(err)); assert(err_is_ok(ret_error)); // FIXME // printf("IRQ for this device is %d\n", irq); //DEBUG_ERR(err, "enable_and_route_interrupt"); pci_enable_interrupt_for_device(*bus, *dev, *fun, pcie); return SYS_ERR_OK; } void pci_enable_interrupt_for_device(uint32_t bus, uint32_t dev, uint32_t fun, bool pcie) { struct pci_address addr = { .bus = (uint8_t)(bus & 0xff), .device = (uint8_t)(dev & 0xff), .function = (uint8_t)(fun % 0xff) }; pci_hdr0_t hdr; pci_hdr0_initialize(&hdr, addr); if (pcie) { pcie_enable(); } else { pcie_disable(); } pci_hdr0_command_t cmd = pci_hdr0_command_rd(&hdr); cmd.int_dis = 0; pci_hdr0_command_wr(&hdr, cmd); } static bool device_exists_pcie(struct pci_address* addr) { pci_hdr0_t hdr; pci_hdr0_initialize(&hdr, *addr); pcie_enable(); uint16_t pcie_vendor = pci_hdr0_vendor_id_rd(&hdr); if (pcie_vendor == 0xffff) { if (addr->function == 0) { return false; } } return true; } static bool check_extendned_caps_for_sriov(struct pci_address* addr, pci_sr_iov_cap_t* ret_sr_iov_cap) { pci_hdr0_t devhdr; pci_hdr0_initialize(&devhdr, *addr); bool extended_caps = false; pcie_enable(); // Process device capabilities if existing if (pci_hdr0_status_rd(&devhdr).caplist) { uint8_t cap_ptr = pci_hdr0_cap_ptr_rd(&devhdr); // Walk capabilities list while (cap_ptr != 0) { assert(cap_ptr % 4 == 0 && cap_ptr >= 0x40); uint32_t capword = pci_read_conf_header(addr, cap_ptr / 4); switch (capword & 0xff) { case 0x10: // PCI Express extended_caps = true; break; default: break; } cap_ptr = (capword >> 8) & 0xff; } } else { return false; } // Process extended device capabilities if existing if (extended_caps && addr->bus < pcie_get_endbus()) { uint32_t *ad_int = (uint32_t *) pcie_confspace_access(*addr); assert(ad_int != NULL); uint16_t cap_ptr = 0x100; while (cap_ptr != 0) { uint32_t capword = *(ad_int + (cap_ptr / 4)); assert(cap_ptr % 4 == 0 && cap_ptr >= 0x100 && cap_ptr < 0x1000); switch (capword & 0xffff) { // Switch on capability ID case 0: // No extended caps break; case 16: // SR-IOV pci_sr_iov_cap_initialize(ret_sr_iov_cap, (mackerel_addr_t) (ad_int + (cap_ptr / 4))); return true; } cap_ptr = capword >> 20; } } return false; } static void pci_enable_vfs(struct pci_address* addr, pci_sr_iov_cap_t* sr_iov_cap) { pcie_enable(); PCI_DEBUG("Enable Virtual Function for device bus=%d, device=%d, function %d\n", addr->bus, addr->device, addr->function); // Support version 1 for the moment assert(pci_sr_iov_cap_hdr_ver_rdf(sr_iov_cap) == 1); // Support system page size of 4K at the moment assert(pci_sr_iov_cap_sys_psize_rd(sr_iov_cap) == 1); // Set maximum number of VFs (Has to be done before enabling) uint16_t totalvfs = pci_sr_iov_cap_totalvfs_rd(sr_iov_cap); pci_sr_iov_cap_numvfs_wr(sr_iov_cap, MIN(max_numvfs, totalvfs)); // Start VFs (including memory spaces) pci_sr_iov_cap_ctrl_vf_mse_wrf(sr_iov_cap, 1); pci_sr_iov_cap_ctrl_vf_enable_wrf(sr_iov_cap, 1); // Spec says to wait here for at least 100ms errval_t err = barrelfish_usleep(100000); assert(err_is_ok(err)); } static void pci_add_vf_bars_to_skb(struct pci_address* vf_addr, uint32_t vfn, pci_sr_iov_cap_t* sr_iov_cap) { pcie_enable(); pci_hdr0_bar32_t bar, barorigaddr; for (int i = 0; i < pci_sr_iov_cap_vf_bar_length; i++) { union pci_hdr0_bar32_un orig_value; orig_value.raw = pci_sr_iov_cap_vf_bar_rd(sr_iov_cap, i); barorigaddr = orig_value.val; // probe BAR to see if it is implemented pci_sr_iov_cap_vf_bar_wr(sr_iov_cap, i, BAR_PROBE); union pci_hdr0_bar32_un bar_value; bar_value.raw = pci_sr_iov_cap_vf_bar_rd(sr_iov_cap, i); bar = (union pci_hdr0_bar32_un ) { .raw =bar_value.raw }.val; //write original value back to the BAR pci_sr_iov_cap_vf_bar_wr(sr_iov_cap, i, orig_value.raw); /* * We need to check the entire register * here to make sure the bar is not * implemented as it could lie in the * high 64 bit range... */ if (bar_value.raw == 0) { // BAR not implemented continue; } // SR-IOV doesn't support IO space BARs assert(bar.space == 0); int type = -1; if (bar.tpe == pci_hdr0_bar_32bit) { type = 32; } if (bar.tpe == pci_hdr0_bar_64bit) { type = 64; } if (bar.tpe == pci_hdr0_bar_64bit) { //we must take the next BAR into account and do the same //tests like in the 32bit case, but this time with the combined //value from the current and the next BAR, since a 64bit BAR //is constructed out of two consequtive 32bit BARs //read the upper 32bits of the address uint32_t orig_value_high = pci_sr_iov_cap_vf_bar_rd(sr_iov_cap, i + 1); // probe BAR to determine the mapping size pci_sr_iov_cap_vf_bar_wr(sr_iov_cap, i + 1, BAR_PROBE); // read the size information of the bar uint32_t bar_value_high = pci_sr_iov_cap_vf_bar_rd(sr_iov_cap, i + 1); //write original value back to the BAR pci_sr_iov_cap_vf_bar_wr(sr_iov_cap, i + 1, orig_value_high); pciaddr_t base64 = 0, origbase64 = 0; base64 = bar_value_high; base64 <<= 32; base64 |= (uint32_t) (bar.base << 7); origbase64 = orig_value_high; origbase64 <<= 32; origbase64 |= (uint32_t) (barorigaddr.base << 7); PCI_DEBUG("(%u,%u,%u): 64bit BAR %d at 0x%" PRIxPCIADDR ", size %" PRIx64 ", %s\n", vf_addr->bus, vf_addr->device, vf_addr->function, i, origbase64 + bar_mapping_size64(base64)*vfn, bar_mapping_size64(base64), (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable")); skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIxPCIADDR", " "16'%" PRIx64 ", mem, %s, %d).", vf_addr->bus, vf_addr->device, vf_addr->function, i, origbase64 + bar_mapping_size64(base64)*vfn, bar_mapping_size64(base64), (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"), type); i++; //step one forward, because it is a 64bit BAR } else { PCI_DEBUG("(%u,%u,%u): 32bit BAR %d at 0x%" PRIx32 ", size %x, %s\n", vf_addr->bus, vf_addr->device, vf_addr->function, i, (barorigaddr.base << 7) + bar_mapping_size(bar) * vfn, bar_mapping_size(bar), "nonprefetchable"); //(bar.prefetch == 1 ? "prefetchable" : "nonprefetchable")); //32bit BAR // XXX make every 32 bit bar noprefetchable. Only graphics cards have // 32 bit prefetchable bars ... skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIx32", 16'%" PRIx32 ", vf, %s, %d).", vf_addr->bus, vf_addr->device, vf_addr->function, i, (uint32_t) ((barorigaddr.base << 7) + bar_mapping_size( bar) * vfn), (uint32_t) bar_mapping_size(bar), "nonprefetchable" ,type); } } } static void pci_get_vf_addr(struct pci_address* addr, uint32_t vf_number, pci_sr_iov_cap_t* sr_iov_cap, struct pci_address* vf_addr) { uint16_t offset = pci_sr_iov_cap_offset_rd(sr_iov_cap); uint16_t stride = pci_sr_iov_cap_stride_rd(sr_iov_cap); uint8_t busnr = addr->bus + ((((addr->device << 3) + addr->function) + offset + stride * vf_number) >> 8); uint8_t devfn = (((addr->device << 3) + addr->function) + offset + stride * vf_number) & 0xff; vf_addr->bus = busnr; vf_addr->device = devfn >> 3; vf_addr->function = devfn & 7; PCI_DEBUG("VF (bus=%d, device=%d, function=%d) offset is 0x%x, stride is 0x%x\n", vf_addr->bus, vf_addr->device, vf_addr->function, offset, stride); } /* static errval_t pci_add_vf_to_skb(struct pci_address* addr, pci_sr_iov_cap_t* sr_iov_cap, struct pci_address* vf_addr) { errval_t err; // PCI header (classcode) pci_hdr0_t devhdr; pci_hdr0_initialize(&devhdr, *addr); pci_hdr0_class_code_t classcode = pci_hdr0_class_code_rd(&devhdr); uint32_t vendor = pci_hdr0_vendor_id_rd(&devhdr); uint16_t vf_devid = pci_sr_iov_cap_devid_rd(sr_iov_cap); // octopus start char* device_fmt ="hw.pci.device. { " "bus: %u, device: %u, function: %u, " "vendor: %u, device_id: %u, class: %u, " "subclass: %u, prog_if: %u }"; err = oct_mset(SET_SEQUENTIAL, device_fmt, vf_addr->bus, vf_addr->device, vf_addr->function, vendor, vf_devid, classcode.clss, classcode.subclss, classcode.prog_if); return err; } */ static errval_t pci_init_vf(struct pci_address* addr, pci_sr_iov_cap_t* sr_iov_cap, struct pci_address* vf_addr) { errval_t err; // PCI header (classcode) pci_hdr0_t devhdr; pci_hdr0_initialize(&devhdr, *addr); uint32_t vendor = pci_hdr0_vendor_id_rd(&devhdr); uint16_t vf_devid = pci_sr_iov_cap_devid_rd(sr_iov_cap); // TODO might need this int num_bars; bool pcie; uint32_t bus = vf_addr->bus; uint32_t device = vf_addr->device; uint32_t function = vf_addr->function; err = device_init(PCI_DONT_CARE, PCI_DONT_CARE, PCI_DONT_CARE, vendor, vf_devid, &bus, &device, &function, &pcie, &num_bars); return err; } static errval_t pci_get_max_vfs_for_device(struct pci_address* addr, uint32_t* max_vfs) { // PCIE always enable for SRIOV pcie_enable(); // Check if device exists if(!device_exists_pcie(addr)) { return PCI_ERR_SRIOV_NOT_SUPPORTED; } // Check if SR-IOV capable pci_sr_iov_cap_t sr_iov_cap; if (!check_extendned_caps_for_sriov(addr, &sr_iov_cap)) { return PCI_ERR_SRIOV_NOT_SUPPORTED; } *max_vfs = pci_sr_iov_cap_totalvfs_rd(&sr_iov_cap); return SYS_ERR_OK; } errval_t pci_get_vf_addr_of_device(struct pci_address addr, uint32_t vf_num, struct pci_address* vf_addr) { // PCIE always enable for SRIOV pcie_enable(); // Check if device exists if(!device_exists_pcie(&addr)) { return PCI_ERR_SRIOV_NOT_SUPPORTED; } // Check if SR-IOV capable pci_sr_iov_cap_t sr_iov_cap; if (!check_extendned_caps_for_sriov(&addr, &sr_iov_cap)) { return PCI_ERR_SRIOV_NOT_SUPPORTED; } uint32_t max_vfs= pci_sr_iov_cap_totalvfs_rd(&sr_iov_cap); if (vf_num >= max_vfs) { return PCI_ERR_SRIOV_MAX_VF; } pci_get_vf_addr(&addr, vf_num, &sr_iov_cap, vf_addr); return SYS_ERR_OK; } errval_t pci_start_virtual_function_for_device(struct pci_address* addr, uint32_t vf_number) { // PCIE always enable for SRIOV pcie_enable(); // Check if device exists if(!device_exists_pcie(addr)) { return PCI_ERR_SRIOV_NOT_SUPPORTED; } // Check if SR-IOV capable pci_sr_iov_cap_t sr_iov_cap; if (!check_extendned_caps_for_sriov(addr, &sr_iov_cap)) { return PCI_ERR_SRIOV_NOT_SUPPORTED; } struct pci_address vf_addr; pci_get_vf_addr(addr, vf_number, &sr_iov_cap, &vf_addr); errval_t err; err = pci_init_vf(addr, &sr_iov_cap, &vf_addr); if (err_is_fail(err)) { return err; } return SYS_ERR_OK; } #if 0 static errval_t add_pci_model_node(struct pci_address addr) { errval_t err = SYS_ERR_OK; if( (addr.bus == 10 && addr.device == 0 && addr.function == 0) || (addr.bus == 4 && addr.device == 0 && addr.function == 0) ){ HWMODEL_QUERY_DEBUG( "state_get(S)," "add_pci(S, addr(%u,%u,%u), E1, NewS)," "writeln(E1)," "state_set(NewS)", addr.bus, addr.device, addr.function); err = skb_execute_query( "state_get(S)," "add_pci(S, addr(%u,%u,%u), E1, NewS)," "writeln(E1)," "state_set(NewS)", addr.bus, addr.device, addr.function); if(err_is_fail(err)){ DEBUG_SKB_ERR(err, "add_pci"); } debug_printf("Allocated model node=%s, for PCI device (%u,%u,%u)\n", skb_get_output(), addr.bus, addr.device, addr.function); } else { PCI_DEBUG("Not adding model node for addr(%u,%u,%u)\n", addr.bus, addr.device, addr.function); } return err; } #endif /** * This function performs a recursive, depth-first search through the * PCI hierarchy starting at parentaddr (this should initially be a * PCI root complex), with bus number A. It enters whatever it * discovers (bridges and devices) into the SKB. * * Refer to http://www.tldp.org/LDP/tlk/dd/pci.html for an overview of * a similar discovery algorithm. * * Upon discovery of a bridge, it sets the bridge's primary bus number * to A and assigns a secondary bus number of A + 2. The subordinate * bus number is set to A + 3. This way, buses are spaced 2 apart, * which is sometimes required for SR-IOV hot-plugged buses. */ static void assign_bus_numbers(struct pci_address parentaddr, uint8_t *busnum, uint8_t maxchild, char* handle) { struct pci_address addr = { .bus = parentaddr.bus }; pcie_enable(); errval_t err; // First go through all bridges on this bus and disable them for (addr.device = 0; addr.device < PCI_NDEVICES; addr.device++) { for (addr.function = 0; addr.function < PCI_NFUNCTIONS; addr.function++) { pci_hdr1_t bhdr; pci_hdr1_initialize(&bhdr, addr); uint16_t vendor = pci_hdr1_vendor_id_rd(&bhdr); if (vendor == 0xffff) { if (addr.function == 0) { // this device doesn't exist at all break; } else { // this function doesn't exist, but there may be others continue; } } pci_hdr1_hdr_type_t hdr_type = pci_hdr1_hdr_type_rd(&bhdr); if (hdr_type.fmt == pci_hdr1_pci2pci) { PCI_DEBUG("Disabling bridge (%u,%u,%u)\n", addr.bus, addr.device, addr.function); pci_hdr1_bcfg_t bcfg = pci_hdr1_bcfg_rd(&bhdr); bcfg.pri_bus = 0; bcfg.sec_bus = 0; bcfg.sub_bus = 0; pci_hdr1_bcfg_wr(&bhdr, bcfg); } } } for (addr.device = 0; addr.device < PCI_NDEVICES; addr.device++) { for (addr.function = 0; addr.function < PCI_NFUNCTIONS; addr.function++) { pci_hdr0_t hdr; pci_hdr0_initialize(&hdr, addr); pcie_enable(); uint16_t pcie_vendor = pci_hdr0_vendor_id_rd(&hdr); uint16_t vendor = pcie_vendor; bool pcie = true; bool extended_caps = false; // Whether to scan for PCI Express extended caps // Disable PCIe if device exists only in PCI if (pcie_vendor != 0xffff) { vendor = pcie_vendor; pcie = true; } else { pcie_disable(); vendor = pci_hdr0_vendor_id_rd(&hdr); pcie = false; } if (vendor == 0xffff) { if (addr.function == 0) { // this device doesn't exist at all break; } else { // this function doesn't exist, but there may be others continue; } } pci_hdr0_class_code_t classcode = pci_hdr0_class_code_rd(&hdr); uint16_t device_id = pci_hdr0_device_id_rd(&hdr); /* Disable all decoders for this device, * they will be re-enabled as devices are setup. * NB: we are using "pci_hdr1" here, but the command field is * common to all configuration header types. */ /* PCI_DEBUG("disabling decoders for (%hhu,%hhu,%hhu)\n", */ /* addr.bus, addr.device, addr.function); */ pci_hdr0_command_t cmd = pci_hdr0_command_rd(&hdr); cmd.mem_space = 0; cmd.io_space = 0; // XXX: not handled in setup yet // Ticket #210 //XXX: This should be set to 0 and only enabled if needed // (whenever a driver attaches to a device). // For bridges the pci driver enables the bit later when // programming the bridge window // cmd.master = 0; // Ticket 229 //pci_hdr0_command_wr(&hdr, cmd); // do we have a bridge? pci_hdr0_hdr_type_t hdr_type = pci_hdr0_hdr_type_rd(&hdr); if (hdr_type.fmt == pci_hdr0_pci2pci) { pci_hdr1_t bhdr; pci_hdr1_initialize(&bhdr, addr); //ACPI_HANDLE child; char* child = malloc(acpi__read_irq_table_response_child_MAX_ARGUMENT_SIZE); assert(child); errval_t error_code; PCI_DEBUG("get irq table for (%hhu,%hhu,%hhu)\n", (*busnum) + 2, addr.device, addr.function); struct acpi_binding* cl = get_acpi_binding(); // XXX: why do we have two different types for the same thing? acpi_pci_address_t xaddr = { .bus = addr.bus, .device = addr.device, .function = addr.function, }; err = cl->rpc_tx_vtbl.read_irq_table(cl, handle, xaddr, (*busnum) + 2, &error_code, child); if (err_is_ok(err) && error_code == ACPI_ERR_NO_CHILD_BRIDGE){ PCI_DEBUG("No corresponding ACPI entry for bridge found\n"); } else if (err_is_fail(err) || err_is_fail(error_code)) { DEBUG_ERR(error_code, "Reading IRQs failed"); } // Increase by 2 to leave room for SR-IOV (*busnum) += 2; //assert(*busnum <= maxchild); PCI_DEBUG("program busses for bridge (%hhu,%hhu,%hhu)\n" "primary: %hhu, secondary: %hhu, subordinate: %hhu\n", addr.bus, addr.device, addr.function, addr.bus, *busnum, (*busnum) + 1); // Disable master abort mode on the bridge pci_hdr1_brdg_ctrl_mabort_wrf(&bhdr, 0); // Clear all errors pci_hdr1_status_wr_raw(&bhdr, 0); // program bus numbers for this bridge pci_hdr1_bcfg_t bcfg = pci_hdr1_bcfg_rd(&bhdr); bcfg.pri_bus = addr.bus; bcfg.sec_bus = *busnum; bcfg.sub_bus = 0xff; pci_hdr1_bcfg_wr(&bhdr, bcfg); skb_add_fact("bridge(%s,addr(%u,%u,%u),%u,%u,%u,%u,%u, secondary(%hhu)).", (pcie ? "pcie" : "pci"), addr.bus, addr.device, addr.function, vendor, device_id, classcode.clss, classcode.subclss, classcode.prog_if, *busnum); //use the original hdr (pci_hdr0_t) here query_bars(hdr, addr, true); // assign bus numbers to secondary bus struct pci_address bridge_addr = { .bus = *busnum, .device = addr.device, .function = addr.function }; assign_bus_numbers(bridge_addr, busnum, maxchild, child); // Restore the old state of pcie. The above call changes this // state according to the devices under this bridge if (pcie) { pcie_enable(); } else { pcie_disable(); } // Set this bridge's subordinate to the maximum of the underlying hierarchy pci_hdr1_bcfg_sub_bus_wrf(&bhdr, (*busnum) + 1); } //is this a normal PCI device? if (hdr_type.fmt == pci_hdr0_nonbridge) { PCI_DEBUG("Found device (%u, %u, %u), vendor = %x, device = %x\n", addr.bus, addr.device, addr.function, vendor, device_id); pci_hdr0_t devhdr; pci_hdr0_initialize(&devhdr, addr); err = skb_add_fact("device(%s,addr(%u,%u,%u),%u,%u,%u, %u, %u, %d).", (pcie ? "pcie" : "pci"), addr.bus, addr.device, addr.function, vendor, device_id, classcode.clss, classcode.subclss, classcode.prog_if, pci_hdr0_int_pin_rd(&devhdr) - 1); if(err_is_fail(err)){ USER_PANIC_SKB_ERR(err, "add device fact"); } // octopus start static char* device_fmt = "hw.pci.device. { " "bus: %u, device: %u, function: %u, " "vendor: %u, device_id: %u, class: %u, " "subclass: %u, prog_if: %u }"; // TODO: Figure out why these tries are necessary. int tries = 3; while(tries-->0){ err = oct_mset(SET_SEQUENTIAL, device_fmt, addr.bus, addr.device, addr.function, vendor, device_id, classcode.clss, classcode.subclss, classcode.prog_if); if(err_is_ok(err)) break; if(tries == 0) { USER_PANIC_ERR(err, "oct_mset"); } PCI_DEBUG("oct_mset failed. Retrying..."); } // end octopus query_bars(devhdr, addr, false); // Process device capabilities if existing if (pci_hdr0_status_rd(&devhdr).caplist) { uint8_t cap_ptr = pci_hdr0_cap_ptr_rd(&devhdr); // Walk capabilities list while (cap_ptr != 0) { assert(cap_ptr % 4 == 0 && cap_ptr >= 0x40); uint32_t capword = pci_read_conf_header(&addr, cap_ptr / 4); switch (capword & 0xff) { case 0x10: // PCI Express PCI_DEBUG("PCI Express device\n"); extended_caps = true; break; default: PCI_DEBUG("Unknown PCI device capability 0x%x at 0x%x\n", capword & 0xff, cap_ptr); break; } cap_ptr = (capword >> 8) & 0xff; } } // Process extended device capabilities if existing if (pcie && extended_caps && addr.bus < pcie_get_endbus()) { uint32_t *ad = (uint32_t *) pcie_confspace_access(addr); assert(ad != NULL); uint16_t cap_ptr = 0x100; while (cap_ptr != 0) { uint32_t capword = *(ad + (cap_ptr / 4)); assert(cap_ptr % 4 == 0 && cap_ptr >= 0x100 && cap_ptr < 0x1000); switch (capword & 0xffff) { // Switch on capability ID case 0: // No extended caps break; case 16: if (vendor == 0x8086 && (device_id & 0xFFF0) == 0x1520) { debug_printf("skipping SR IOV initialization" "for e1000 card.\n"); break; } // Enable VFs only when the num_vfs argument is given if (!enable_vfs) { debug_printf("############ skipping SR IOV initialization " "for device (bus=%d dev=%d fun=%d).########## \n", addr.bus, addr.device, addr.function); break; } pci_sr_iov_cap_t sr_iov_cap; pci_sr_iov_cap_initialize(&sr_iov_cap, (mackerel_addr_t) (ad + (cap_ptr / 4))); pci_enable_vfs(&addr, &sr_iov_cap); uint32_t total_vfs; err = pci_get_max_vfs_for_device(&addr, &total_vfs); assert(err_is_ok(err)); if (err_is_fail(err)) { USER_PANIC_ERR(err, "%s", __FUNCTION__); } // Add fake bridge for Bridge programming algo // bus + 1 so VFs are taken into account if (addr.function == 0) { PCI_DEBUG("bridge(%s,addr(%d,%d,%d),%u,%u,%u,%u,%u, secondary(%hhu)).", (pcie ? "pcie" : "pci"), 0, 257, 0, vendor, device_id, 0, 0, 0, addr.bus + 1); skb_add_fact("bridge(%s,addr(%d,%d,%d),%u,%u,%u,%u,%u, secondary(%hhu)).", (pcie ? "pcie" : "pci"), 0, 257, 0, vendor, device_id, 0, 0, 0, addr.bus + 1); } struct pci_address vf_addr; uint32_t vfs = MIN(max_numvfs, total_vfs); for (int vfn=0; vfn < vfs; vfn++) { pci_get_vf_addr(&addr, vfn, &sr_iov_cap, &vf_addr); uint16_t vf_devid = pci_sr_iov_cap_devid_rd(&sr_iov_cap); // Add device as an SKB fact but do not add the octopus record yet, // required for rest of the pci code skb_add_fact("device(pcie,addr(%u,%u,%u),%u,%u,%u, %u, %u, %d).", vf_addr.bus, vf_addr.device, vf_addr.function, vendor, vf_devid, classcode.clss, classcode.subclss, classcode.prog_if, 0); pci_add_vf_bars_to_skb(&vf_addr, vfn, &sr_iov_cap); } break; default: PCI_DEBUG("Unknown extended PCI device capability 0x%x at 0x%x\n", capword & 0xffff, cap_ptr); break; } cap_ptr = capword >> 20; } } } if(hdr_type.fmt == pci_hdr0_cardbus) { printf("PCI: WARNING: Found cardbus bridge.\n"); } // is this a multi-function device? if (addr.function == 0 && !hdr_type.multi) { break; } } } free(handle); } #if 0 static void get_bridges(struct pci_address myad) { struct pci_address addr = {.bus = myad.bus}; pcie_enable(); // First go through all bridges on this bus and disable them for (addr.device = 0; addr.device < PCI_NDEVICES; addr.device++) { for (addr.function = 0; addr.function < PCI_NFUNCTIONS; addr.function++) { pci_hdr1_t bhdr; pci_hdr1_initialize(&bhdr, addr); uint16_t vendor = pci_hdr1_vendor_id_rd(&bhdr); if (vendor == 0xffff) { if (addr.function == 0) { // this device doesn't exist at all break; } else { // this function doesn't exist, but there may be others continue; } } pci_hdr1_hdr_type_t hdr_type = pci_hdr1_hdr_type_rd(&bhdr); if (hdr_type.fmt == pci_hdr1_pci2pci) { pci_hdr1_bcfg_t bcfg = pci_hdr1_bcfg_rd(&bhdr); PCI_DEBUG("Found bridge (%u,%u,%u), primary %u, secondary %u, subordinate %u\n", addr.bus, addr.device, addr.function, bcfg.pri_bus, bcfg.sec_bus, bcfg.sub_bus); struct pci_address bridge_addr= { .bus = bcfg.sec_bus, .device = addr.device, .function = addr.function }; get_bridges(bridge_addr); } } } } #endif void pci_add_root(struct pci_address addr, uint8_t maxchild, char* handle) { uint8_t busnum = addr.bus; /* get_bridges(addr); */ assign_bus_numbers(addr, &busnum, maxchild, handle); /* get_bridges(addr); */ } errval_t pci_setup_root_complex(void) { errval_t err; char* record = NULL; char** names = NULL; size_t len = 0; // TODO: react to new rootbridges err = oct_get_names(&names, &len, "r'hw.pci.rootbridge.[0-9]+' " "{ acpi_node: _, bus: _, device: _, function: _, maxbus: _ }"); if (err_is_fail(err)) { DEBUG_ERR(err, "get names"); goto out; } for (size_t i = 0; i < len; i++) { err = oct_get(&record, names[i]); if (err_is_fail(err)) { goto out; } PCI_DEBUG("found new root complex: %s\n", record); char* acpi_node = NULL; // freed in pci_add_root int64_t bus, device, function, maxbus; static char* format = "_ { acpi_node: %s, bus: %d, device: %d, function: %d, maxbus: %d }"; err = oct_read(record, format, &acpi_node, &bus, &device, &function, &maxbus); if (err_is_fail(err)) { free(acpi_node); free(record); goto out; } struct pci_address addr; addr.bus = (uint8_t) bus; addr.device = (uint8_t) device; addr.function = (uint8_t) function; pcie_enable(); pci_add_root(addr, maxbus, acpi_node); pcie_disable(); } out: oct_free_names(names, len); return err; } //query all BARs. That means, get the original address, the mapping size //and all attributes. // XXX: asq: We are using this function to program also the _two_ BARs // of a PCI-to-PCI bridge. They are at the same offset within the // PCI header like for any PCI device. PCI HDR0 is misused // here for the bridges. static void query_bars(pci_hdr0_t devhdr, struct pci_address addr, bool pci2pci_bridge) { pci_hdr0_bar32_t bar, barorigaddr; int maxbars = pci2pci_bridge ? 1 : pci_hdr0_bars_length; for (int i = 0; i <= maxbars; i++) { union pci_hdr0_bar32_un orig_value; orig_value.raw = pci_hdr0_bars_rd(&devhdr, i); barorigaddr = orig_value.val; // probe BAR to determine the mapping size pci_hdr0_bars_wr(&devhdr, i, BAR_PROBE); uint32_t bar_value = pci_hdr0_bars_rd(&devhdr, i); bar = (union pci_hdr0_bar32_un ) { .raw = bar_value }.val; //write original value back to the BAR pci_hdr0_bars_wr(&devhdr, i, orig_value.raw); /* * Cannot just compare the base value, with addresses over 4G there * will be a problem. Thus we need to check if the entire register is * zero. If it is a 32bit register, then the address part will be filled. * If it is a 64bit register, the type will contain a nonzero value. * - 2014-05-02, RA */ if (bar_value == 0) { // BAR not implemented continue; } if (bar.space == 0) { // memory mapped //bar(addr(bus, device, function), barnr, orig address, size, space, // prefetchable?, 64bit?). //where space = mem | io, prefetchable= prefetchable | nonprefetchable, //64bit = 64bit | 32bit. int type = -1; if (bar.tpe == pci_hdr0_bar_32bit) { type = 32; } if (bar.tpe == pci_hdr0_bar_64bit) { type = 64; } if (bar.tpe == pci_hdr0_bar_64bit) { //we must take the next BAR into account and do the same //tests like in the 32bit case, but this time with the combined //value from the current and the next BAR, since a 64bit BAR //is constructed out of two consequtive 32bit BARs //read the upper 32bits of the address uint32_t orig_value_high = pci_hdr0_bars_rd(&devhdr, i + 1); // probe BAR to determine the mapping size pci_hdr0_bars_wr(&devhdr, i + 1, BAR_PROBE); // read the size information of the bar uint32_t bar_value_high = pci_hdr0_bars_rd(&devhdr, i + 1); //write original value back to the BAR pci_hdr0_bars_wr(&devhdr, i + 1, orig_value_high); pciaddr_t base64 = 0, origbase64 = 0; base64 = bar_value_high; base64 <<= 32; base64 |= (uint32_t) (bar.base << 7); origbase64 = orig_value_high; origbase64 <<= 32; origbase64 |= (uint32_t) (barorigaddr.base << 7); PCI_DEBUG("(%u,%u,%u): 64bit BAR %d at 0x%" PRIxPCIADDR ", size %" PRIx64 ", %s\n", addr.bus, addr.device, addr.function, i, origbase64, bar_mapping_size64(base64), (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable")); PCI_DEBUG("(%u,%u,%u): 64bit BAR %d at 0x%" PRIxPCIADDR ", size %" PRIx64 ", %s\n", addr.bus, addr.device, addr.function, i, origbase64 << 7, bar_mapping_size64(base64), (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable")); skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIxPCIADDR", " "16'%" PRIx64 ", mem, %s, %d).", addr.bus, addr.device, addr.function, i, origbase64, bar_mapping_size64(base64), (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"), type); i++; //step one forward, because it is a 64bit BAR } else { PCI_DEBUG("(%u,%u,%u): 32bit BAR %d at 0x%" PRIx32 ", size %x, %s\n", addr.bus, addr.device, addr.function, i, barorigaddr.base << 7, bar_mapping_size(bar), (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable")); //32bit BAR skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIx32", 16'%" PRIx32 ", mem, %s, %d).", addr.bus, addr.device, addr.function, i, (uint32_t) (barorigaddr.base << 7), (uint32_t) bar_mapping_size(bar), (bar.prefetch == 1 ? "prefetchable" : "nonprefetchable"), type); } } else { PCI_DEBUG("(%u,%u,%u): IO BAR %d at 0x%x, size %x\n", addr.bus, addr.device, addr.function, i, barorigaddr.base << 7, bar_mapping_size(bar)); //bar(addr(bus, device, function), barnr, orig address, size, space). //where space = mem | io skb_add_fact("bar(addr(%u, %u, %u), %d, 16'%"PRIx32", 16'%" PRIx32 ", io, " "nonprefetchable, 32).", addr.bus, addr.device, addr.function, i, (uint32_t) (barorigaddr.base << 7), (uint32_t) bar_mapping_size(bar)); } } } static void program_bridge_window(uint8_t bus, uint8_t dev, uint8_t fun, pciaddr_t base, pciaddr_t high, bool pcie, bool mem, bool pref) { struct pci_address addr; pci_hdr1_prefbl_t pref_reg; pci_hdr1_command_t cmd; if (pcie) { pcie_enable(); } else { pcie_disable(); } assert((base & 0x000fffff) == 0); assert((high & 0x000fffff) == 0x000fffff); addr.bus = bus; addr.device = dev; addr.function = fun; pci_hdr1_t bridgehdr; pci_hdr1_initialize(&bridgehdr, addr); cmd = pci_hdr1_command_rd(&bridgehdr); if (mem) { if (pref) { pci_hdr1_pref_base_upper_wr(&bridgehdr, base >> 32); pci_hdr1_pref_limit_upper_wr(&bridgehdr, high >> 32); /* * The least significant nibble of this register value (1h) * indicates that a 64 bit address decoder is supported and * that the Upper Base/Limit Registers are also used. */ if ((base >> 32)) { pref_reg.tpe = pci_hdr1_mem_64bit; } else { pref_reg.tpe = pci_hdr1_mem_32bit; } pref_reg.val = base >> 20; pci_hdr1_pref_base_wr(&bridgehdr, pref_reg); if ((high >> 32)) { pref_reg.tpe = pci_hdr1_mem_64bit; } else { pref_reg.tpe = pci_hdr1_mem_32bit; } pref_reg.val = high >> 20; pci_hdr1_pref_limit_wr(&bridgehdr, pref_reg); } else { assert((base & 0xffffffff00000000) == 0); assert((high & 0xffffffff00000000) == 0); pci_hdr1_membl_t membl = { .base = base >> 16, .limit = high >> 16, }; pci_hdr1_membl_wr(&bridgehdr, membl); /* pci_hdr1_mem_base_wr(&bridgehdr, base >> 16); */ /* pci_hdr1_mem_limit_wr(&bridgehdr, high >> 16); */ } // enable the memory decoder cmd.mem_space = 1; } else { // I/O } cmd.int_dis = 0; cmd.master = 1; pci_hdr1_command_wr(&bridgehdr, cmd); } static void program_device_bar(uint8_t bus, uint8_t dev, uint8_t fun, int bar, pciaddr_t base, pcisize_t size, int bits, bool memspace, bool pcie) { struct pci_address addr; addr.bus = bus; addr.device = dev; addr.function = fun; if (pcie) { pcie_enable(); } else { pcie_disable(); } pci_hdr0_t devhdr; pci_hdr0_initialize(&devhdr, addr); //disable the address decoder for programming the BARs pci_hdr0_command_t cmd = pci_hdr0_command_rd(&devhdr); if (memspace) { cmd.mem_space = 0; } else { cmd.io_space = 0; } //disbale interrupts here. enable them as soon as a driver requests //interrupts cmd.int_dis = 1; pci_hdr0_command_wr(&devhdr, cmd); if (bits == 64) { pci_hdr0_bars_wr(&devhdr, bar, base & 0xffffffff); pci_hdr0_bars_wr(&devhdr, bar + 1, base >> 32); } else { // 32-bit if (base + size > 0xffffffff) { // temporary workaround debug_printf("%s: skipping base:%lx size:%lx top:%lx\n", __func__, base, size, base + size); return; } assert(base + size <= 0xffffffff); // 32-bit BAR pci_hdr0_bars_wr(&devhdr, bar, base); } //re-enable the decoder for the BARs if (memspace) { cmd.mem_space = 1; } else { cmd.io_space = 1; } pci_hdr0_command_wr(&devhdr, cmd); } static void enable_busmaster(uint8_t bus, uint8_t dev, uint8_t fun, bool pcie) { struct pci_address addr; addr.bus = bus; addr.device = dev; addr.function = fun; if (pcie) { pcie_enable(); } else { pcie_disable(); } pci_hdr0_t devhdr; pci_hdr0_initialize(&devhdr, addr); //enable bus master pci_hdr0_command_t cmd = pci_hdr0_command_rd(&devhdr); cmd.master = 1; pci_hdr0_command_wr(&devhdr, cmd); } void pci_program_bridges(void) { char element_type[7]; // "device" | "bridge" char bar_secondary[16]; //[0-6] | secondary(<0-255>) char space[4]; // "mem" | "io" char prefetch[16]; // "prefetchable" | "nonprefetchable" char pcie_pci[5]; // "pcie" | "pci" int bar; // the value of bar_secondary after parsing secondary() to uint8_t bus, dev, fun; pciaddr_t base, high; pcisize_t size; int bits; bool mem, pcie, pref; char *output = NULL; int output_length = 0; int error_code = 0; /* output = NULL; output_length = 0; skb_execute("listing."); output = skb_get_output(); assert(output != NULL); output_length = strlen(output); PCI_DEBUG("pci_program_bridges: output = %s\n", output); PCI_DEBUG("pci_program_bridges: output length = %d\n", output_length); error_code = skb_read_error_code(); if (error_code != 0) { printf("pci.c: pci_program_bridges(): SKB returnd error code %d\n", error_code); const char *errout = skb_get_error_output(); printf("\nSKB error returned: %s\n", errout); printf("\nSKB output: %s\n", output); // XXX: no device can be used... return; } */ output = NULL; output_length = 0; char bridge_program[512]; // bridge program itself already has been loaded before // decoding net has dependency to some infos of it debug_printf("PCI programming using: %s \n", skb_bridge_program); snprintf(bridge_program, 512, "bridge_programming(P, Nr)," "flatten(P, F),replace_current_BAR_values(F)," "write(nrelements(Nr)),writeln(P)."); skb_execute(bridge_program); output = strdup(skb_get_output()); assert(output != NULL); output_length = strlen(output); PCI_DEBUG("pci_program_bridges: output = %s\n", output); PCI_DEBUG("pci_program_bridges: output length = %d\n", output_length); error_code = skb_read_error_code(); if (error_code != 0) { printf("pci.c: pci_program_bridges(): SKB returned error code %d\n", error_code); const char *errout = skb_get_error_output(); printf("SKB error returned: %s\n", errout); printf("SKB output: %s\n", output); // XXX: no device can be used... printf("WARNING: CONTINUING, HOWEVER PCI DEVICES WILL BE UNUSABLE\n"); // except IO-space devices which aren't yet affected by bridge programming free(output); return; } /* ******************************************************************************** //for the ASPLOS11 paper: skb_execute("[bridge_page]."); while (skb_read_error_code() == SKB_PROCESSING) messages_wait_and_handle_next(); char *output = skb_get_output(); ssert(output != NULL); int output_length = strlen(output); PCI_DEBUG("pci_program_bridges: output = %s\n", output); PCI_DEBUG("pci_program_bridges: output length = %d\n", output_length); int error_code = skb_read_error_code(); if (error_code != 0) { printf("pci.c: pci_program_bridges() <2>: SKB returnd error code %d\n", error_code); const char *errout = skb_get_error_output(); printf("\nSKB error returned <2>: %s\n", errout); printf("\nSKB output <2>: %s\n", output); // XXX: no device can be used... return; } uint64_t start =rdtsc(); // uint64_t start =rdtscp(); skb_execute("bridge_programming(P, Nr),write(nrelements(Nr)),writeln(P)."); uint64_t end =rdtsc(); // uint64_t end =rdtscp(); assert(end >= start); printf("\n\nTicks: %lu\n\n", end - start); while (skb_read_error_code() == SKB_PROCESSING) messages_wait_and_handle_next(); output = skb_get_output(); assert(output != NULL); output_length = strlen(output); printf("pci_program_bridges: output = %s\n", output); PCI_DEBUG("pci_program_bridges: output length = %d\n", output_length); error_code = skb_read_error_code(); if (error_code != 0) { printf("pci.c: pci_program_bridges() <3>: SKB returnd error code %d\n", error_code); const char *errout = skb_get_error_output(); printf("\nSKB error returned <3>: %s\n", errout); printf("\nSKB output <3>: %s\n", output); // XXX: no device can be used... return; } ******************************************************************************** */ //get the number of buselements from the output int nr_elements; int nr_conversions; //keep a pointer to the current location within the output char *conv_ptr = output; // Skip any warnings from Prolog. while ((conv_ptr < output + output_length) && (strncmp( conv_ptr, "nrelements", strlen("nrelements")) != 0)) { conv_ptr++; } nr_conversions = sscanf(conv_ptr, "nrelements(%d)", &nr_elements); if (nr_conversions != 1) { printf("pci.c: No valid pci plan returned by the SKB\n."); //XXX: no device can be used free(output); return; } //iterate over all buselements for (int i = 0; i < nr_elements; i++) { // search the beginning of the next buselement while ((conv_ptr < output + output_length) && (strncmp( conv_ptr, "buselement", strlen("buselement")) != 0)) { conv_ptr++; } //convert the string to single elements and numbers nr_conversions = sscanf(conv_ptr, "buselement(%[a-z], addr(%hhu, %hhu, %hhu), " "%[a-z0-9()], %"PRIuPCIADDR", %"PRIuPCIADDR", " "%"PRIuPCISIZE", %[a-z], %[a-z], %[a-z], %d", element_type, &bus, &dev, &fun, bar_secondary, &base, &high, &size, space, prefetch, pcie_pci, &bits); if(strncmp("bridge_page", skb_bridge_program, strlen("bridge_page")) == 0){ base *= BASE_PAGE_SIZE; high *= BASE_PAGE_SIZE; size *= BASE_PAGE_SIZE; } conv_ptr++; if (nr_conversions != 12) { printf("Could not parse output for device or bridge number %d\n" "nr conversions: %d \n", i, nr_conversions); continue; } if (strncmp(space, "mem", strlen("mem")) == 0) { mem = true; } else { mem = false; } if (strncmp(pcie_pci, "pcie", strlen("pcie")) == 0) { pcie = true; } else { pcie = false; } if (strncmp(prefetch, "prefetchable", strlen("prefetchable")) == 0) { pref = true; } else { pref = false; } if (strncmp(element_type, "device", strlen("device")) == 0) { nr_conversions = sscanf(bar_secondary, "%d", &bar); if (nr_conversions != 1) { printf("Could not determine BAR number while programming BAR\n"); continue; } PCI_DEBUG("programming %s addr(%hhu, %hhu, %hhu), BAR %d, with base = " "%"PRIxPCIADDR", high = %"PRIxPCIADDR", size = %"PRIxPCISIZE " in" "space = %s, prefetch = %s, %s... Bits %d\n", element_type, bus, dev, fun, bar, base, high, size, space, prefetch, pcie ? "PCIe" : "PCI", bits); program_device_bar(bus, dev, fun, bar, base, size, bits, mem, pcie); } else { PCI_DEBUG("programming %s addr(%hhu, %hhu, %hhu), with base = " "%"PRIxPCIADDR", high = %"PRIxPCIADDR", size = %"PRIxPCISIZE " in space = %s, prefetch = %s... Bits %d\n", element_type, bus, dev, fun, base, high, size, space, prefetch, bits); if (size != 0) { //a bridge expects the high address excluding the last byte which //is the base for the next bridge => decrement by one high--; program_bridge_window(bus, dev, fun, base, high, pcie, mem, pref); } } } // add bridge bars to decoding net if (decoding_net) { errval_t err; HWMODEL_QUERY_DEBUG( "state_get(S)," "add_all_pci(S, NewS)," "state_set(NewS)."); err = skb_execute_query( "state_get(S)," "add_all_pci(S, NewS)," "state_set(NewS)."); if(err_is_fail(err)){ DEBUG_SKB_ERR(err, "add_pci"); } } free(output); } uint32_t pci_setup_interrupt(uint32_t bus, uint32_t dev, uint32_t fun) { char str[256], ldev[128]; snprintf(str, 256, "[\"irq_routing.pl\"], assigndeviceirq(addr(%"PRIu32 ", %"PRIu32", %"PRIu32")).", bus, dev, fun); char *output, *error_out; int32_t int_err; errval_t err = skb_evaluate(str, &output, &error_out, &int_err); assert(output != NULL); assert(err_is_ok(err)); uint8_t irq; sscanf(output, "%s %hhu", ldev, &irq); // It's a GSI if (strcmp(ldev, "fixedGsi") == 0) { printf("Got GSI %u\n", irq); return irq; } struct acpi_binding* cl = get_acpi_binding(); errval_t error_code; err = cl->rpc_tx_vtbl.set_device_irq(cl, ldev, irq, &error_code); assert(err_is_ok(err)); if (err_is_fail(error_code)) { //DEBUG_ERR(error_code, "set device irq failed."); return 0; } return irq; }