uipc_cow.c revision 207708
1/*--
2 * Copyright (c) 1997, Duke University
3 * All rights reserved.
4 *
5 * Author:
6 *         Andrew Gallatin <gallatin@cs.duke.edu>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of Duke University may not be used to endorse or promote
17 *    products derived from this software without specific prior written
18 *    permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY DUKE UNIVERSITY ``AS IS'' AND ANY
21 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DUKE UNIVERSITY BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITSOR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
28 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * This is a set of routines for enabling and disabling copy on write
35 * protection for data written into sockets.
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: head/sys/kern/uipc_cow.c 207708 2010-05-06 17:43:41Z alc $");
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/proc.h>
45#include <sys/lock.h>
46#include <sys/mutex.h>
47#include <sys/mbuf.h>
48#include <sys/sf_buf.h>
49#include <sys/socketvar.h>
50#include <sys/uio.h>
51
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_param.h>
55#include <vm/pmap.h>
56#include <vm/vm_map.h>
57#include <vm/vm_page.h>
58#include <vm/vm_object.h>
59
60
61struct netsend_cow_stats {
62	int attempted;
63	int fail_not_mapped;
64	int fail_sf_buf;
65	int success;
66	int iodone;
67};
68
69static struct netsend_cow_stats socow_stats;
70
71static void socow_iodone(void *addr, void *args);
72
73static void
74socow_iodone(void *addr, void *args)
75{
76	struct sf_buf *sf;
77	vm_page_t pp;
78
79	sf = args;
80	pp = sf_buf_page(sf);
81	sf_buf_free(sf);
82	/* remove COW mapping  */
83	vm_page_lock(pp);
84	vm_page_cowclear(pp);
85	vm_page_unwire(pp, 0);
86	/*
87	 * Check for the object going away on us. This can
88	 * happen since we don't hold a reference to it.
89	 * If so, we're responsible for freeing the page.
90	 */
91	if (pp->wire_count == 0 && pp->object == NULL)
92		vm_page_free(pp);
93	vm_page_unlock(pp);
94	socow_stats.iodone++;
95}
96
97int
98socow_setup(struct mbuf *m0, struct uio *uio)
99{
100	struct sf_buf *sf;
101	vm_page_t pp;
102	struct iovec *iov;
103	struct vmspace *vmspace;
104	struct vm_map *map;
105	vm_offset_t offset, uva;
106
107	socow_stats.attempted++;
108	vmspace = curproc->p_vmspace;
109	map = &vmspace->vm_map;
110	uva = (vm_offset_t) uio->uio_iov->iov_base;
111	offset = uva & PAGE_MASK;
112
113	/*
114	 * Verify that access to the given address is allowed from user-space.
115	 */
116	if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0)
117		return (0);
118
119       /*
120	* verify page is mapped & not already wired for i/o
121	*/
122	pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ);
123	if (pp == NULL) {
124		socow_stats.fail_not_mapped++;
125		return(0);
126	}
127
128	/*
129	 * set up COW
130	 */
131	vm_page_lock(pp);
132	if (vm_page_cowsetup(pp) != 0) {
133		vm_page_unhold(pp);
134		vm_page_unlock(pp);
135		return (0);
136	}
137
138	/*
139	 * wire the page for I/O
140	 */
141	vm_page_wire(pp);
142	vm_page_unhold(pp);
143	vm_page_unlock(pp);
144	/*
145	 * Allocate an sf buf
146	 */
147	sf = sf_buf_alloc(pp, SFB_CATCH);
148	if (sf == NULL) {
149		vm_page_lock(pp);
150		vm_page_cowclear(pp);
151		vm_page_unwire(pp, 0);
152		/*
153		 * Check for the object going away on us. This can
154		 * happen since we don't hold a reference to it.
155		 * If so, we're responsible for freeing the page.
156		 */
157		if (pp->wire_count == 0 && pp->object == NULL)
158			vm_page_free(pp);
159		vm_page_unlock(pp);
160		socow_stats.fail_sf_buf++;
161		return(0);
162	}
163	/*
164	 * attach to mbuf
165	 */
166	MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone,
167	    (void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF);
168	m0->m_len = PAGE_SIZE - offset;
169	m0->m_data = (caddr_t)sf_buf_kva(sf) + offset;
170	socow_stats.success++;
171
172	iov = uio->uio_iov;
173	iov->iov_base = (char *)iov->iov_base + m0->m_len;
174	iov->iov_len -= m0->m_len;
175	uio->uio_resid -= m0->m_len;
176	uio->uio_offset += m0->m_len;
177	if (iov->iov_len == 0) {
178		uio->uio_iov++;
179		uio->uio_iovcnt--;
180	}
181
182	return(m0->m_len);
183}
184