uipc_cow.c revision 186719
1/*-- 2 * Copyright (c) 1997, Duke University 3 * All rights reserved. 4 * 5 * Author: 6 * Andrew Gallatin <gallatin@cs.duke.edu> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of Duke University may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY DUKE UNIVERSITY ``AS IS'' AND ANY 21 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DUKE UNIVERSITY BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITSOR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 28 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * This is a set of routines for enabling and disabling copy on write 35 * protection for data written into sockets. 36 */ 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD: head/sys/kern/uipc_cow.c 186719 2009-01-03 13:24:08Z kib $"); 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/kernel.h> 44#include <sys/proc.h> 45#include <sys/lock.h> 46#include <sys/mutex.h> 47#include <sys/mbuf.h> 48#include <sys/sf_buf.h> 49#include <sys/socketvar.h> 50#include <sys/uio.h> 51 52#include <vm/vm.h> 53#include <vm/vm_extern.h> 54#include <vm/vm_param.h> 55#include <vm/pmap.h> 56#include <vm/vm_map.h> 57#include <vm/vm_page.h> 58#include <vm/vm_object.h> 59 60 61struct netsend_cow_stats { 62 int attempted; 63 int fail_not_mapped; 64 int fail_sf_buf; 65 int success; 66 int iodone; 67}; 68 69static struct netsend_cow_stats socow_stats; 70 71static void socow_iodone(void *addr, void *args); 72 73static void 74socow_iodone(void *addr, void *args) 75{ 76 struct sf_buf *sf; 77 vm_page_t pp; 78 79 sf = args; 80 pp = sf_buf_page(sf); 81 sf_buf_free(sf); 82 /* remove COW mapping */ 83 vm_page_lock_queues(); 84 vm_page_cowclear(pp); 85 vm_page_unwire(pp, 0); 86 /* 87 * Check for the object going away on us. This can 88 * happen since we don't hold a reference to it. 89 * If so, we're responsible for freeing the page. 90 */ 91 if (pp->wire_count == 0 && pp->object == NULL) 92 vm_page_free(pp); 93 vm_page_unlock_queues(); 94 socow_stats.iodone++; 95} 96 97int 98socow_setup(struct mbuf *m0, struct uio *uio) 99{ 100 struct sf_buf *sf; 101 vm_page_t pp; 102 struct iovec *iov; 103 struct vmspace *vmspace; 104 struct vm_map *map; 105 vm_offset_t offset, uva; 106 107 socow_stats.attempted++; 108 vmspace = curproc->p_vmspace; 109 map = &vmspace->vm_map; 110 uva = (vm_offset_t) uio->uio_iov->iov_base; 111 offset = uva & PAGE_MASK; 112 113 /* 114 * Verify that access to the given address is allowed from user-space. 115 */ 116 if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0) 117 return (0); 118 119 /* 120 * verify page is mapped & not already wired for i/o 121 */ 122 pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ); 123 if (pp == NULL) { 124 socow_stats.fail_not_mapped++; 125 return(0); 126 } 127 128 /* 129 * set up COW 130 */ 131 vm_page_lock_queues(); 132 if (vm_page_cowsetup(pp) != 0) { 133 vm_page_unhold(pp); 134 vm_page_unlock_queues(); 135 return (0); 136 } 137 138 /* 139 * wire the page for I/O 140 */ 141 vm_page_wire(pp); 142 vm_page_unhold(pp); 143 vm_page_unlock_queues(); 144 145 /* 146 * Allocate an sf buf 147 */ 148 sf = sf_buf_alloc(pp, SFB_CATCH); 149 if (!sf) { 150 vm_page_lock_queues(); 151 vm_page_cowclear(pp); 152 vm_page_unwire(pp, 0); 153 /* 154 * Check for the object going away on us. This can 155 * happen since we don't hold a reference to it. 156 * If so, we're responsible for freeing the page. 157 */ 158 if (pp->wire_count == 0 && pp->object == NULL) 159 vm_page_free(pp); 160 vm_page_unlock_queues(); 161 socow_stats.fail_sf_buf++; 162 return(0); 163 } 164 /* 165 * attach to mbuf 166 */ 167 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone, 168 (void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF); 169 m0->m_len = PAGE_SIZE - offset; 170 m0->m_data = (caddr_t)sf_buf_kva(sf) + offset; 171 socow_stats.success++; 172 173 iov = uio->uio_iov; 174 iov->iov_base = (char *)iov->iov_base + m0->m_len; 175 iov->iov_len -= m0->m_len; 176 uio->uio_resid -= m0->m_len; 177 uio->uio_offset += m0->m_len; 178 if (iov->iov_len == 0) { 179 uio->uio_iov++; 180 uio->uio_iovcnt--; 181 } 182 183 return(m0->m_len); 184} 185