1139804Simp/*-- 298849Sken * Copyright (c) 1997, Duke University 398849Sken * All rights reserved. 498849Sken * 598849Sken * Author: 698849Sken * Andrew Gallatin <gallatin@cs.duke.edu> 798849Sken * 898849Sken * Redistribution and use in source and binary forms, with or without 998849Sken * modification, are permitted provided that the following conditions 1098849Sken * are met: 1198849Sken * 1. Redistributions of source code must retain the above copyright 1298849Sken * notice, this list of conditions and the following disclaimer. 1398849Sken * 2. Redistributions in binary form must reproduce the above copyright 1498849Sken * notice, this list of conditions and the following disclaimer in the 1598849Sken * documentation and/or other materials provided with the distribution. 1699479Sgallatin * 3. The name of Duke University may not be used to endorse or promote 1798849Sken * products derived from this software without specific prior written 1898849Sken * permission. 1998849Sken * 2098849Sken * THIS SOFTWARE IS PROVIDED BY DUKE UNIVERSITY ``AS IS'' AND ANY 2198849Sken * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2298849Sken * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2398849Sken * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DUKE UNIVERSITY BE LIABLE 2498849Sken * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2598849Sken * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2698849Sken * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITSOR BUSINESS 2798849Sken * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 2898849Sken * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 2998849Sken * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 3098849Sken * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3198849Sken */ 32116182Sobrien 3398849Sken/* 3498849Sken * This is a set of routines for enabling and disabling copy on write 3598849Sken * protection for data written into sockets. 3698849Sken */ 3798849Sken 38116182Sobrien#include <sys/cdefs.h> 39116182Sobrien__FBSDID("$FreeBSD$"); 40116182Sobrien 4198849Sken#include <sys/param.h> 4298849Sken#include <sys/systm.h> 43219028Snetchild#include <sys/sysctl.h> 4498849Sken#include <sys/kernel.h> 4598849Sken#include <sys/proc.h> 4698849Sken#include <sys/lock.h> 4798849Sken#include <sys/mutex.h> 4898849Sken#include <sys/mbuf.h> 49122780Salc#include <sys/sf_buf.h> 5098849Sken#include <sys/socketvar.h> 5198849Sken#include <sys/uio.h> 5298849Sken 5398849Sken#include <vm/vm.h> 54151559Salc#include <vm/vm_extern.h> 5598849Sken#include <vm/vm_param.h> 5698849Sken#include <vm/pmap.h> 5798849Sken#include <vm/vm_map.h> 5898849Sken#include <vm/vm_page.h> 5998849Sken#include <vm/vm_object.h> 6098849Sken 61219028SnetchildFEATURE(zero_copy_sockets, "Zero copy sockets support"); 6298849Sken 6398849Skenstruct netsend_cow_stats { 6498849Sken int attempted; 6598849Sken int fail_not_mapped; 66124639Sgallatin int fail_sf_buf; 6798849Sken int success; 6898849Sken int iodone; 6998849Sken}; 7098849Sken 71124639Sgallatinstatic struct netsend_cow_stats socow_stats; 7298849Sken 7399008Salfredstatic void socow_iodone(void *addr, void *args); 7498849Sken 7598849Skenstatic void 7699008Salfredsocow_iodone(void *addr, void *args) 7798849Sken{ 7898849Sken struct sf_buf *sf; 7998849Sken vm_page_t pp; 8098849Sken 81112316Salc sf = args; 82122780Salc pp = sf_buf_page(sf); 83127150Salc sf_buf_free(sf); 8498849Sken /* remove COW mapping */ 85207548Salc vm_page_lock(pp); 8698849Sken vm_page_cowclear(pp); 87127150Salc vm_page_unwire(pp, 0); 88127150Salc /* 89127150Salc * Check for the object going away on us. This can 90127150Salc * happen since we don't hold a reference to it. 91127150Salc * If so, we're responsible for freeing the page. 92127150Salc */ 93127150Salc if (pp->wire_count == 0 && pp->object == NULL) 94127150Salc vm_page_free(pp); 95207548Salc vm_page_unlock(pp); 9698849Sken socow_stats.iodone++; 9798849Sken} 9898849Sken 9998849Skenint 10098849Skensocow_setup(struct mbuf *m0, struct uio *uio) 10198849Sken{ 10298849Sken struct sf_buf *sf; 10398849Sken vm_page_t pp; 10498849Sken struct iovec *iov; 10598849Sken struct vmspace *vmspace; 10698849Sken struct vm_map *map; 107147009Sgallatin vm_offset_t offset, uva; 108216699Salc vm_size_t len; 10998849Sken 110151559Salc socow_stats.attempted++; 111113267Salc vmspace = curproc->p_vmspace; 11298849Sken map = &vmspace->vm_map; 11398849Sken uva = (vm_offset_t) uio->uio_iov->iov_base; 114147009Sgallatin offset = uva & PAGE_MASK; 115216699Salc len = PAGE_SIZE - offset; 11698849Sken 117151559Salc /* 118151559Salc * Verify that access to the given address is allowed from user-space. 119151559Salc */ 120216702Salc if (vm_fault_quick_hold_pages(map, uva, len, VM_PROT_READ, &pp, 1) < 121216699Salc 0) { 12298849Sken socow_stats.fail_not_mapped++; 12398849Sken return(0); 12498849Sken } 12598849Sken 12698849Sken /* 12798849Sken * set up COW 12898849Sken */ 129207410Skmacy vm_page_lock(pp); 130186719Skib if (vm_page_cowsetup(pp) != 0) { 131186719Skib vm_page_unhold(pp); 132207410Skmacy vm_page_unlock(pp); 133186719Skib return (0); 134186719Skib } 13598849Sken 13698849Sken /* 13798849Sken * wire the page for I/O 13898849Sken */ 13998849Sken vm_page_wire(pp); 140151579Salc vm_page_unhold(pp); 141207410Skmacy vm_page_unlock(pp); 142112382Sgallatin /* 143112382Sgallatin * Allocate an sf buf 144112382Sgallatin */ 145137372Salc sf = sf_buf_alloc(pp, SFB_CATCH); 146207708Salc if (sf == NULL) { 147207548Salc vm_page_lock(pp); 148124639Sgallatin vm_page_cowclear(pp); 149124639Sgallatin vm_page_unwire(pp, 0); 150124639Sgallatin /* 151124639Sgallatin * Check for the object going away on us. This can 152124639Sgallatin * happen since we don't hold a reference to it. 153124639Sgallatin * If so, we're responsible for freeing the page. 154124639Sgallatin */ 155124639Sgallatin if (pp->wire_count == 0 && pp->object == NULL) 156124639Sgallatin vm_page_free(pp); 157207548Salc vm_page_unlock(pp); 158124639Sgallatin socow_stats.fail_sf_buf++; 159124639Sgallatin return(0); 160124639Sgallatin } 16198849Sken /* 16298849Sken * attach to mbuf 16398849Sken */ 164175872Sphk MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone, 165175872Sphk (void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF); 166216699Salc m0->m_len = len; 167147009Sgallatin m0->m_data = (caddr_t)sf_buf_kva(sf) + offset; 16898849Sken socow_stats.success++; 16998849Sken 17098849Sken iov = uio->uio_iov; 171147009Sgallatin iov->iov_base = (char *)iov->iov_base + m0->m_len; 172147009Sgallatin iov->iov_len -= m0->m_len; 173147009Sgallatin uio->uio_resid -= m0->m_len; 174147009Sgallatin uio->uio_offset += m0->m_len; 17598849Sken if (iov->iov_len == 0) { 17698849Sken uio->uio_iov++; 17798849Sken uio->uio_iovcnt--; 17898849Sken } 17998849Sken 180147009Sgallatin return(m0->m_len); 18198849Sken} 182