Deleted Added
full compact
uipc_socket.c (255138) uipc_socket.c (255608)
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California.
4 * Copyright (c) 2004 The FreeBSD Foundation
5 * Copyright (c) 2004-2008 Robert N. M. Watson
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 87 unchanged lines hidden (view full) ---

96 * NOTE: With regard to VNETs the general rule is that callers do not set
97 * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
98 * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
99 * and sorflush(), which are usually called from a pre-set VNET context.
100 * sopoll() currently does not need a VNET context to be set.
101 */
102
103#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California.
4 * Copyright (c) 2004 The FreeBSD Foundation
5 * Copyright (c) 2004-2008 Robert N. M. Watson
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without

--- 87 unchanged lines hidden (view full) ---

96 * NOTE: With regard to VNETs the general rule is that callers do not set
97 * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
98 * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
99 * and sorflush(), which are usually called from a pre-set VNET context.
100 * sopoll() currently does not need a VNET context to be set.
101 */
102
103#include <sys/cdefs.h>
104__FBSDID("$FreeBSD: head/sys/kern/uipc_socket.c 255138 2013-09-01 23:34:53Z davide $");
104__FBSDID("$FreeBSD: head/sys/kern/uipc_socket.c 255608 2013-09-16 06:25:54Z kib $");
105
106#include "opt_inet.h"
107#include "opt_inet6.h"
105
106#include "opt_inet.h"
107#include "opt_inet6.h"
108#include "opt_zero.h"
109#include "opt_compat.h"
110
111#include <sys/param.h>
112#include <sys/systm.h>
113#include <sys/fcntl.h>
114#include <sys/limits.h>
115#include <sys/lock.h>
116#include <sys/mac.h>

--- 99 unchanged lines hidden (view full) ---

216 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP,
217 0, sizeof(int), sysctl_somaxconn, "I",
218 "Maximum listen socket pending connection accept queue size (compat)");
219
220static int numopensockets;
221SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
222 &numopensockets, 0, "Number of open sockets");
223
108#include "opt_compat.h"
109
110#include <sys/param.h>
111#include <sys/systm.h>
112#include <sys/fcntl.h>
113#include <sys/limits.h>
114#include <sys/lock.h>
115#include <sys/mac.h>

--- 99 unchanged lines hidden (view full) ---

215 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP,
216 0, sizeof(int), sysctl_somaxconn, "I",
217 "Maximum listen socket pending connection accept queue size (compat)");
218
219static int numopensockets;
220SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
221 &numopensockets, 0, "Number of open sockets");
222
224#if defined(SOCKET_SEND_COW) || defined(SOCKET_RECV_PFLIP)
225SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0,
226 "Zero copy controls");
227#ifdef SOCKET_RECV_PFLIP
228int so_zero_copy_receive = 1;
229SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW,
230 &so_zero_copy_receive, 0, "Enable zero copy receive");
231#endif
232#ifdef SOCKET_SEND_COW
233int so_zero_copy_send = 1;
234SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW,
235 &so_zero_copy_send, 0, "Enable zero copy send");
236#endif /* SOCKET_SEND_COW */
237#endif /* SOCKET_SEND_COW || SOCKET_RECV_PFLIP */
238
239/*
240 * accept_mtx locks down per-socket fields relating to accept queues. See
241 * socketvar.h for an annotation of the protected fields of struct socket.
242 */
243struct mtx accept_mtx;
244MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
245
246/*

--- 726 unchanged lines hidden (view full) ---

973 return (ENOTCONN);
974 if (so->so_state & SS_ISDISCONNECTING)
975 return (EALREADY);
976 VNET_SO_ASSERT(so);
977 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
978 return (error);
979}
980
223/*
224 * accept_mtx locks down per-socket fields relating to accept queues. See
225 * socketvar.h for an annotation of the protected fields of struct socket.
226 */
227struct mtx accept_mtx;
228MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
229
230/*

--- 726 unchanged lines hidden (view full) ---

957 return (ENOTCONN);
958 if (so->so_state & SS_ISDISCONNECTING)
959 return (EALREADY);
960 VNET_SO_ASSERT(so);
961 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
962 return (error);
963}
964
981#ifdef SOCKET_SEND_COW
982struct so_zerocopy_stats{
983 int size_ok;
984 int align_ok;
985 int found_ifp;
986};
987struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
988
989/*
990 * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise
991 * sosend_dgram() and sosend_generic() use m_uiotombuf().
992 *
993 * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or
994 * all of the data referenced by the uio. If desired, it uses zero-copy.
995 * *space will be updated to reflect data copied in.
996 *
997 * NB: If atomic I/O is requested, the caller must already have checked that
998 * space can hold resid bytes.
999 *
1000 * NB: In the event of an error, the caller may need to free the partial
1001 * chain pointed to by *mpp. The contents of both *uio and *space may be
1002 * modified even in the case of an error.
1003 */
1004static int
1005sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
1006 int flags)
1007{
1008 struct mbuf *m, **mp, *top;
1009 long len;
1010 ssize_t resid;
1011 int error;
1012 int cow_send;
1013
1014 *retmp = top = NULL;
1015 mp = &top;
1016 len = 0;
1017 resid = uio->uio_resid;
1018 error = 0;
1019 do {
1020 cow_send = 0;
1021 if (resid >= MINCLSIZE) {
1022 if (top == NULL) {
1023 m = m_gethdr(M_WAITOK, MT_DATA);
1024 m->m_pkthdr.len = 0;
1025 m->m_pkthdr.rcvif = NULL;
1026 } else
1027 m = m_get(M_WAITOK, MT_DATA);
1028 if (so_zero_copy_send &&
1029 resid >= PAGE_SIZE &&
1030 *space >= PAGE_SIZE &&
1031 uio->uio_iov->iov_len >= PAGE_SIZE) {
1032 so_zerocp_stats.size_ok++;
1033 so_zerocp_stats.align_ok++;
1034 cow_send = socow_setup(m, uio);
1035 len = cow_send;
1036 }
1037 if (!cow_send) {
1038 m_clget(m, M_WAITOK);
1039 len = min(min(MCLBYTES, resid), *space);
1040 }
1041 } else {
1042 if (top == NULL) {
1043 m = m_gethdr(M_WAITOK, MT_DATA);
1044 m->m_pkthdr.len = 0;
1045 m->m_pkthdr.rcvif = NULL;
1046
1047 len = min(min(MHLEN, resid), *space);
1048 /*
1049 * For datagram protocols, leave room
1050 * for protocol headers in first mbuf.
1051 */
1052 if (atomic && m && len < MHLEN)
1053 MH_ALIGN(m, len);
1054 } else {
1055 m = m_get(M_WAITOK, MT_DATA);
1056 len = min(min(MLEN, resid), *space);
1057 }
1058 }
1059 if (m == NULL) {
1060 error = ENOBUFS;
1061 goto out;
1062 }
1063
1064 *space -= len;
1065 if (cow_send)
1066 error = 0;
1067 else
1068 error = uiomove(mtod(m, void *), (int)len, uio);
1069 resid = uio->uio_resid;
1070 m->m_len = len;
1071 *mp = m;
1072 top->m_pkthdr.len += len;
1073 if (error)
1074 goto out;
1075 mp = &m->m_next;
1076 if (resid <= 0) {
1077 if (flags & MSG_EOR)
1078 top->m_flags |= M_EOR;
1079 break;
1080 }
1081 } while (*space > 0 && atomic);
1082out:
1083 *retmp = top;
1084 return (error);
1085}
1086#endif /* SOCKET_SEND_COW */
1087
1088#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
1089
1090int
1091sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
1092 struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1093{
1094 long space;
1095 ssize_t resid;
1096 int clen = 0, error, dontroute;
965#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
966
967int
968sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
969 struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
970{
971 long space;
972 ssize_t resid;
973 int clen = 0, error, dontroute;
1097#ifdef SOCKET_SEND_COW
1098 int atomic = sosendallatonce(so) || top;
1099#endif
1100
1101 KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
1102 KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
1103 ("sosend_dgram: !PR_ATOMIC"));
1104
1105 if (uio != NULL)
1106 resid = uio->uio_resid;
1107 else

--- 66 unchanged lines hidden (view full) ---

1174 error = EMSGSIZE;
1175 goto out;
1176 }
1177 if (uio == NULL) {
1178 resid = 0;
1179 if (flags & MSG_EOR)
1180 top->m_flags |= M_EOR;
1181 } else {
974
975 KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
976 KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
977 ("sosend_dgram: !PR_ATOMIC"));
978
979 if (uio != NULL)
980 resid = uio->uio_resid;
981 else

--- 66 unchanged lines hidden (view full) ---

1048 error = EMSGSIZE;
1049 goto out;
1050 }
1051 if (uio == NULL) {
1052 resid = 0;
1053 if (flags & MSG_EOR)
1054 top->m_flags |= M_EOR;
1055 } else {
1182#ifdef SOCKET_SEND_COW
1183 error = sosend_copyin(uio, &top, atomic, &space, flags);
1184 if (error)
1185 goto out;
1186#else
1187 /*
1188 * Copy the data from userland into a mbuf chain.
1189 * If no data is to be copied in, a single empty mbuf
1190 * is returned.
1191 */
1192 top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
1193 (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
1194 if (top == NULL) {
1195 error = EFAULT; /* only possible error */
1196 goto out;
1197 }
1198 space -= resid - uio->uio_resid;
1056 /*
1057 * Copy the data from userland into a mbuf chain.
1058 * If no data is to be copied in, a single empty mbuf
1059 * is returned.
1060 */
1061 top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
1062 (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
1063 if (top == NULL) {
1064 error = EFAULT; /* only possible error */
1065 goto out;
1066 }
1067 space -= resid - uio->uio_resid;
1199#endif /* SOCKET_SEND_COW */
1200 resid = uio->uio_resid;
1201 }
1202 KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
1203 /*
1204 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
1205 * than with.
1206 */
1207 if (dontroute) {

--- 155 unchanged lines hidden (view full) ---

1363 SOCKBUF_UNLOCK(&so->so_snd);
1364 space -= clen;
1365 do {
1366 if (uio == NULL) {
1367 resid = 0;
1368 if (flags & MSG_EOR)
1369 top->m_flags |= M_EOR;
1370 } else {
1068 resid = uio->uio_resid;
1069 }
1070 KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
1071 /*
1072 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
1073 * than with.
1074 */
1075 if (dontroute) {

--- 155 unchanged lines hidden (view full) ---

1231 SOCKBUF_UNLOCK(&so->so_snd);
1232 space -= clen;
1233 do {
1234 if (uio == NULL) {
1235 resid = 0;
1236 if (flags & MSG_EOR)
1237 top->m_flags |= M_EOR;
1238 } else {
1371#ifdef SOCKET_SEND_COW
1372 error = sosend_copyin(uio, &top, atomic,
1373 &space, flags);
1374 if (error != 0)
1375 goto release;
1376#else
1377 /*
1378 * Copy the data from userland into a mbuf
1379 * chain. If no data is to be copied in,
1380 * a single empty mbuf is returned.
1381 */
1382 top = m_uiotombuf(uio, M_WAITOK, space,
1383 (atomic ? max_hdr : 0),
1384 (atomic ? M_PKTHDR : 0) |
1385 ((flags & MSG_EOR) ? M_EOR : 0));
1386 if (top == NULL) {
1387 error = EFAULT; /* only possible error */
1388 goto release;
1389 }
1390 space -= resid - uio->uio_resid;
1239 /*
1240 * Copy the data from userland into a mbuf
1241 * chain. If no data is to be copied in,
1242 * a single empty mbuf is returned.
1243 */
1244 top = m_uiotombuf(uio, M_WAITOK, space,
1245 (atomic ? max_hdr : 0),
1246 (atomic ? M_PKTHDR : 0) |
1247 ((flags & MSG_EOR) ? M_EOR : 0));
1248 if (top == NULL) {
1249 error = EFAULT; /* only possible error */
1250 goto release;
1251 }
1252 space -= resid - uio->uio_resid;
1391#endif /* SOCKET_SEND_COW */
1392 resid = uio->uio_resid;
1393 }
1394 if (dontroute) {
1395 SOCK_LOCK(so);
1396 so->so_options |= SO_DONTROUTE;
1397 SOCK_UNLOCK(so);
1398 }
1399 /*

--- 75 unchanged lines hidden (view full) ---

1475 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1476 VNET_SO_ASSERT(so);
1477
1478 m = m_get(M_WAITOK, MT_DATA);
1479 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1480 if (error)
1481 goto bad;
1482 do {
1253 resid = uio->uio_resid;
1254 }
1255 if (dontroute) {
1256 SOCK_LOCK(so);
1257 so->so_options |= SO_DONTROUTE;
1258 SOCK_UNLOCK(so);
1259 }
1260 /*

--- 75 unchanged lines hidden (view full) ---

1336 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1337 VNET_SO_ASSERT(so);
1338
1339 m = m_get(M_WAITOK, MT_DATA);
1340 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
1341 if (error)
1342 goto bad;
1343 do {
1483#ifdef SOCKET_RECV_PFLIP
1484 if (so_zero_copy_receive) {
1485 int disposable;
1486
1487 if ((m->m_flags & M_EXT)
1488 && (m->m_ext.ext_type == EXT_DISPOSABLE))
1489 disposable = 1;
1490 else
1491 disposable = 0;
1492
1493 error = uiomoveco(mtod(m, void *),
1494 min(uio->uio_resid, m->m_len), uio, disposable);
1495 } else
1496#endif /* SOCKET_RECV_PFLIP */
1497 error = uiomove(mtod(m, void *),
1498 (int) min(uio->uio_resid, m->m_len), uio);
1499 m = m_free(m);
1500 } while (uio->uio_resid && error == 0 && m);
1501bad:
1502 if (m != NULL)
1503 m_freem(m);
1504 return (error);

--- 306 unchanged lines hidden (view full) ---

1811 * record) when we drop priority; we must note any additions
1812 * to the sockbuf when we block interrupts again.
1813 */
1814 if (mp == NULL) {
1815 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1816 SBLASTRECORDCHK(&so->so_rcv);
1817 SBLASTMBUFCHK(&so->so_rcv);
1818 SOCKBUF_UNLOCK(&so->so_rcv);
1344 error = uiomove(mtod(m, void *),
1345 (int) min(uio->uio_resid, m->m_len), uio);
1346 m = m_free(m);
1347 } while (uio->uio_resid && error == 0 && m);
1348bad:
1349 if (m != NULL)
1350 m_freem(m);
1351 return (error);

--- 306 unchanged lines hidden (view full) ---

1658 * record) when we drop priority; we must note any additions
1659 * to the sockbuf when we block interrupts again.
1660 */
1661 if (mp == NULL) {
1662 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1663 SBLASTRECORDCHK(&so->so_rcv);
1664 SBLASTMBUFCHK(&so->so_rcv);
1665 SOCKBUF_UNLOCK(&so->so_rcv);
1819#ifdef SOCKET_RECV_PFLIP
1820 if (so_zero_copy_receive) {
1821 int disposable;
1822
1823 if ((m->m_flags & M_EXT)
1824 && (m->m_ext.ext_type == EXT_DISPOSABLE))
1825 disposable = 1;
1826 else
1827 disposable = 0;
1828
1829 error = uiomoveco(mtod(m, char *) + moff,
1830 (int)len, uio, disposable);
1831 } else
1832#endif /* SOCKET_RECV_PFLIP */
1833 error = uiomove(mtod(m, char *) + moff, (int)len, uio);
1834 SOCKBUF_LOCK(&so->so_rcv);
1835 if (error) {
1836 /*
1837 * The MT_SONAME mbuf has already been removed
1838 * from the record, so it is necessary to
1839 * remove the data mbufs, if any, to preserve
1840 * the invariant in the case of PR_ADDR that

--- 1912 unchanged lines hidden ---
1666 error = uiomove(mtod(m, char *) + moff, (int)len, uio);
1667 SOCKBUF_LOCK(&so->so_rcv);
1668 if (error) {
1669 /*
1670 * The MT_SONAME mbuf has already been removed
1671 * from the record, so it is necessary to
1672 * remove the data mbufs, if any, to preserve
1673 * the invariant in the case of PR_ADDR that

--- 1912 unchanged lines hidden ---