uipc_cow.c revision 137372
1/*-
2 * Copyright (c) 1997, Duke University
3 * All rights reserved.
4 *
5 * Author:
6 *         Andrew Gallatin <gallatin@cs.duke.edu>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of Duke University may not be used to endorse or promote
17 *    products derived from this software without specific prior written
18 *    permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY DUKE UNIVERSITY ``AS IS'' AND ANY
21 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DUKE UNIVERSITY BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITSOR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
28 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * This is a set of routines for enabling and disabling copy on write
35 * protection for data written into sockets.
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: head/sys/kern/uipc_cow.c 137372 2004-11-08 00:43:46Z alc $");
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/proc.h>
45#include <sys/lock.h>
46#include <sys/mutex.h>
47#include <sys/mbuf.h>
48#include <sys/sf_buf.h>
49#include <sys/socketvar.h>
50#include <sys/uio.h>
51
52#include <vm/vm.h>
53#include <vm/vm_param.h>
54#include <vm/pmap.h>
55#include <vm/vm_map.h>
56#include <vm/vm_page.h>
57#include <vm/vm_object.h>
58
59
60struct netsend_cow_stats {
61	int attempted;
62	int fail_not_mapped;
63	int fail_sf_buf;
64	int success;
65	int iodone;
66};
67
68static struct netsend_cow_stats socow_stats;
69
70static void socow_iodone(void *addr, void *args);
71
72static void
73socow_iodone(void *addr, void *args)
74{
75	struct sf_buf *sf;
76	vm_page_t pp;
77
78	sf = args;
79	pp = sf_buf_page(sf);
80	sf_buf_free(sf);
81	/* remove COW mapping  */
82	vm_page_lock_queues();
83	vm_page_cowclear(pp);
84	vm_page_unwire(pp, 0);
85	/*
86	 * Check for the object going away on us. This can
87	 * happen since we don't hold a reference to it.
88	 * If so, we're responsible for freeing the page.
89	 */
90	if (pp->wire_count == 0 && pp->object == NULL)
91		vm_page_free(pp);
92	vm_page_unlock_queues();
93	socow_stats.iodone++;
94}
95
96int
97socow_setup(struct mbuf *m0, struct uio *uio)
98{
99	struct sf_buf *sf;
100	vm_page_t pp;
101	vm_paddr_t pa;
102	struct iovec *iov;
103	struct vmspace *vmspace;
104	struct vm_map *map;
105	vm_offset_t uva;
106	int s;
107
108	vmspace = curproc->p_vmspace;
109	map = &vmspace->vm_map;
110	uva = (vm_offset_t) uio->uio_iov->iov_base;
111
112	s = splvm();
113
114       /*
115	* verify page is mapped & not already wired for i/o
116	*/
117	socow_stats.attempted++;
118	pa=pmap_extract(map->pmap, uva);
119	if(!pa) {
120		socow_stats.fail_not_mapped++;
121		splx(s);
122		return(0);
123	}
124	pp = PHYS_TO_VM_PAGE(pa);
125
126	/*
127	 * set up COW
128	 */
129	vm_page_lock_queues();
130	vm_page_cowsetup(pp);
131
132	/*
133	 * wire the page for I/O
134	 */
135	vm_page_wire(pp);
136	vm_page_unlock_queues();
137
138	/*
139	 * Allocate an sf buf
140	 */
141	sf = sf_buf_alloc(pp, SFB_CATCH);
142	if (!sf) {
143		vm_page_lock_queues();
144		vm_page_cowclear(pp);
145		vm_page_unwire(pp, 0);
146		/*
147		 * Check for the object going away on us. This can
148		 * happen since we don't hold a reference to it.
149		 * If so, we're responsible for freeing the page.
150		 */
151		if (pp->wire_count == 0 && pp->object == NULL)
152			vm_page_free(pp);
153		vm_page_unlock_queues();
154		socow_stats.fail_sf_buf++;
155		splx(s);
156		return(0);
157	}
158	/*
159	 * attach to mbuf
160	 */
161	m0->m_data = (caddr_t)sf_buf_kva(sf);
162	m0->m_len = PAGE_SIZE;
163	MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone, sf, M_RDONLY,
164	    EXT_SFBUF);
165	socow_stats.success++;
166
167	iov = uio->uio_iov;
168	iov->iov_base = (char *)iov->iov_base + PAGE_SIZE;
169	iov->iov_len -= PAGE_SIZE;
170	uio->uio_resid -= PAGE_SIZE;
171	uio->uio_offset += PAGE_SIZE;
172	if (iov->iov_len == 0) {
173		uio->uio_iov++;
174		uio->uio_iovcnt--;
175	}
176
177	splx(s);
178	return(1);
179}
180