spx_reass.c revision 192753
1/*- 2 * Copyright (c) 1984, 1985, 1986, 1987, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004-2009 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * Copyright (c) 1995, Mike Mitchell 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the University of 45 * California, Berkeley and its contributors. 46 * 4. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)spx_usrreq.h 63 */ 64 65#include <sys/cdefs.h> 66__FBSDID("$FreeBSD: head/sys/netipx/spx_reass.c 192753 2009-05-25 10:25:41Z rwatson $"); 67 68#include <sys/param.h> 69#include <sys/lock.h> 70#include <sys/malloc.h> 71#include <sys/mbuf.h> 72#include <sys/mutex.h> 73#include <sys/proc.h> 74#include <sys/protosw.h> 75#include <sys/signalvar.h> 76#include <sys/socket.h> 77#include <sys/socketvar.h> 78#include <sys/sx.h> 79#include <sys/systm.h> 80 81#include <net/route.h> 82#include <netinet/tcp_fsm.h> 83 84#include <netipx/ipx.h> 85#include <netipx/ipx_pcb.h> 86#include <netipx/ipx_var.h> 87#include <netipx/spx.h> 88#include <netipx/spx_debug.h> 89#include <netipx/spx_timer.h> 90#include <netipx/spx_var.h> 91 92static int spx_use_delack = 0; 93static int spxrexmtthresh = 3; 94 95static __inline void 96spx_insque(struct spx_q *element, struct spx_q *head) 97{ 98 99 element->si_next = head->si_next; 100 element->si_prev = head; 101 head->si_next = element; 102 element->si_next->si_prev = element; 103} 104 105void 106spx_remque(struct spx_q *element) 107{ 108 109 element->si_next->si_prev = element->si_prev; 110 element->si_prev->si_next = element->si_next; 111 element->si_prev = NULL; 112} 113 114/* 115 * Flesh pending queued segments on SPX close. 116 */ 117void 118spx_reass_flush(struct spxpcb *cb) 119{ 120 struct spx_q *s; 121 struct mbuf *m; 122 123 s = cb->s_q.si_next; 124 while (s != &(cb->s_q)) { 125 s = s->si_next; 126 spx_remque(s); 127 m = dtom(s); 128 m_freem(m); 129 } 130} 131 132/* 133 * Initialize SPX segment reassembly queue on SPX socket open. 134 */ 135void 136spx_reass_init(struct spxpcb *cb) 137{ 138 139 cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q; 140} 141 142/* 143 * This is structurally similar to the tcp reassembly routine but its 144 * function is somewhat different: it merely queues packets up, and 145 * suppresses duplicates. 146 */ 147int 148spx_reass(struct spxpcb *cb, struct spx *si) 149{ 150 struct spx_q *q; 151 struct mbuf *m; 152 struct socket *so = cb->s_ipxpcb->ipxp_socket; 153 char packetp = cb->s_flags & SF_HI; 154 int incr; 155 char wakeup = 0; 156 157 IPX_LOCK_ASSERT(cb->s_ipxpcb); 158 159 if (si == SI(0)) 160 goto present; 161 162 /* 163 * Update our news from them. 164 */ 165 if (si->si_cc & SPX_SA) 166 cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW); 167 if (SSEQ_GT(si->si_alo, cb->s_ralo)) 168 cb->s_flags |= SF_WIN; 169 if (SSEQ_LEQ(si->si_ack, cb->s_rack)) { 170 if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) { 171 spxstat.spxs_rcvdupack++; 172 173 /* 174 * If this is a completely duplicate ack and other 175 * conditions hold, we assume a packet has been 176 * dropped and retransmit it exactly as in 177 * tcp_input(). 178 */ 179 if (si->si_ack != cb->s_rack || 180 si->si_alo != cb->s_ralo) 181 cb->s_dupacks = 0; 182 else if (++cb->s_dupacks == spxrexmtthresh) { 183 u_short onxt = cb->s_snxt; 184 int cwnd = cb->s_cwnd; 185 186 cb->s_snxt = si->si_ack; 187 cb->s_cwnd = CUNIT; 188 cb->s_force = 1 + SPXT_REXMT; 189 spx_output(cb, NULL); 190 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur; 191 cb->s_rtt = 0; 192 if (cwnd >= 4 * CUNIT) 193 cb->s_cwnd = cwnd / 2; 194 if (SSEQ_GT(onxt, cb->s_snxt)) 195 cb->s_snxt = onxt; 196 return (1); 197 } 198 } else 199 cb->s_dupacks = 0; 200 goto update_window; 201 } 202 cb->s_dupacks = 0; 203 204 /* 205 * If our correspondent acknowledges data we haven't sent TCP would 206 * drop the packet after acking. We'll be a little more permissive. 207 */ 208 if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) { 209 spxstat.spxs_rcvacktoomuch++; 210 si->si_ack = cb->s_smax + 1; 211 } 212 spxstat.spxs_rcvackpack++; 213 214 /* 215 * If transmit timer is running and timed sequence number was acked, 216 * update smoothed round trip time. See discussion of algorithm in 217 * tcp_input.c 218 */ 219 if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) { 220 spxstat.spxs_rttupdated++; 221 if (cb->s_srtt != 0) { 222 short delta; 223 delta = cb->s_rtt - (cb->s_srtt >> 3); 224 if ((cb->s_srtt += delta) <= 0) 225 cb->s_srtt = 1; 226 if (delta < 0) 227 delta = -delta; 228 delta -= (cb->s_rttvar >> 2); 229 if ((cb->s_rttvar += delta) <= 0) 230 cb->s_rttvar = 1; 231 } else { 232 /* 233 * No rtt measurement yet. 234 */ 235 cb->s_srtt = cb->s_rtt << 3; 236 cb->s_rttvar = cb->s_rtt << 1; 237 } 238 cb->s_rtt = 0; 239 cb->s_rxtshift = 0; 240 SPXT_RANGESET(cb->s_rxtcur, 241 ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1, 242 SPXTV_MIN, SPXTV_REXMTMAX); 243 } 244 245 /* 246 * If all outstanding data is acked, stop retransmit timer and 247 * remember to restart (more output or persist). If there is more 248 * data to be acked, restart retransmit timer, using current 249 * (possibly backed-off) value; 250 */ 251 if (si->si_ack == cb->s_smax + 1) { 252 cb->s_timer[SPXT_REXMT] = 0; 253 cb->s_flags |= SF_RXT; 254 } else if (cb->s_timer[SPXT_PERSIST] == 0) 255 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur; 256 257 /* 258 * When new data is acked, open the congestion window. If the window 259 * gives us less than ssthresh packets in flight, open exponentially 260 * (maxseg at a time). Otherwise open linearly (maxseg^2 / cwnd at a 261 * time). 262 */ 263 incr = CUNIT; 264 if (cb->s_cwnd > cb->s_ssthresh) 265 incr = max(incr * incr / cb->s_cwnd, 1); 266 cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx); 267 268 /* 269 * Trim Acked data from output queue. 270 */ 271 SOCKBUF_LOCK(&so->so_snd); 272 while ((m = so->so_snd.sb_mb) != NULL) { 273 if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack)) 274 sbdroprecord_locked(&so->so_snd); 275 else 276 break; 277 } 278 sowwakeup_locked(so); 279 cb->s_rack = si->si_ack; 280update_window: 281 if (SSEQ_LT(cb->s_snxt, cb->s_rack)) 282 cb->s_snxt = cb->s_rack; 283 if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq && 284 (SSEQ_LT(cb->s_swl2, si->si_ack))) || 285 (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) { 286 /* keep track of pure window updates */ 287 if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack 288 && SSEQ_LT(cb->s_ralo, si->si_alo)) { 289 spxstat.spxs_rcvwinupd++; 290 spxstat.spxs_rcvdupack--; 291 } 292 cb->s_ralo = si->si_alo; 293 cb->s_swl1 = si->si_seq; 294 cb->s_swl2 = si->si_ack; 295 cb->s_swnd = (1 + si->si_alo - si->si_ack); 296 if (cb->s_swnd > cb->s_smxw) 297 cb->s_smxw = cb->s_swnd; 298 cb->s_flags |= SF_WIN; 299 } 300 301 /* 302 * If this packet number is higher than that which we have allocated 303 * refuse it, unless urgent. 304 */ 305 if (SSEQ_GT(si->si_seq, cb->s_alo)) { 306 if (si->si_cc & SPX_SP) { 307 spxstat.spxs_rcvwinprobe++; 308 return (1); 309 } else 310 spxstat.spxs_rcvpackafterwin++; 311 if (si->si_cc & SPX_OB) { 312 if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) 313 return (1); /* else queue this packet; */ 314 } else { 315#ifdef BROKEN 316 /* 317 * XXXRW: This is broken on at least one count: 318 * spx_close() will free the ipxp and related parts, 319 * which are then touched by spx_input() after the 320 * return from spx_reass(). 321 */ 322 /*struct socket *so = cb->s_ipxpcb->ipxp_socket; 323 if (so->so_state && SS_NOFDREF) { 324 spx_close(cb); 325 } else 326 would crash system*/ 327#endif 328 spx_istat.notyet++; 329 return (1); 330 } 331 } 332 333 /* 334 * If this is a system packet, we don't need to queue it up, and 335 * won't update acknowledge #. 336 */ 337 if (si->si_cc & SPX_SP) 338 return (1); 339 340 /* 341 * We have already seen this packet, so drop. 342 */ 343 if (SSEQ_LT(si->si_seq, cb->s_ack)) { 344 spx_istat.bdreas++; 345 spxstat.spxs_rcvduppack++; 346 if (si->si_seq == cb->s_ack - 1) 347 spx_istat.lstdup++; 348 return (1); 349 } 350 351 /* 352 * Loop through all packets queued up to insert in appropriate 353 * sequence. 354 */ 355 for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) { 356 if (si->si_seq == SI(q)->si_seq) { 357 spxstat.spxs_rcvduppack++; 358 return (1); 359 } 360 if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) { 361 spxstat.spxs_rcvoopack++; 362 break; 363 } 364 } 365 spx_insque((struct spx_q *)si, q->si_prev); 366 367 /* 368 * If this packet is urgent, inform process 369 */ 370 if (si->si_cc & SPX_OB) { 371 cb->s_iobc = ((char *)si)[1 + sizeof(*si)]; 372 sohasoutofband(so); 373 cb->s_oobflags |= SF_IOOB; 374 } 375present: 376#define SPINC sizeof(struct spxhdr) 377 SOCKBUF_LOCK(&so->so_rcv); 378 379 /* 380 * Loop through all packets queued up to update acknowledge number, 381 * and present all acknowledged data to user; if in packet interface 382 * mode, show packet headers. 383 */ 384 for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) { 385 if (SI(q)->si_seq == cb->s_ack) { 386 cb->s_ack++; 387 m = dtom(q); 388 if (SI(q)->si_cc & SPX_OB) { 389 cb->s_oobflags &= ~SF_IOOB; 390 if (so->so_rcv.sb_cc) 391 so->so_oobmark = so->so_rcv.sb_cc; 392 else 393 so->so_rcv.sb_state |= SBS_RCVATMARK; 394 } 395 q = q->si_prev; 396 spx_remque(q->si_next); 397 wakeup = 1; 398 spxstat.spxs_rcvpack++; 399#ifdef SF_NEWCALL 400 if (cb->s_flags2 & SF_NEWCALL) { 401 struct spxhdr *sp = mtod(m, struct spxhdr *); 402 u_char dt = sp->spx_dt; 403 spx_newchecks[4]++; 404 if (dt != cb->s_rhdr.spx_dt) { 405 struct mbuf *mm = 406 m_getclr(M_DONTWAIT, MT_CONTROL); 407 spx_newchecks[0]++; 408 if (mm != NULL) { 409 u_short *s = 410 mtod(mm, u_short *); 411 cb->s_rhdr.spx_dt = dt; 412 mm->m_len = 5; /*XXX*/ 413 s[0] = 5; 414 s[1] = 1; 415 *(u_char *)(&s[2]) = dt; 416 sbappend_locked(&so->so_rcv, mm); 417 } 418 } 419 if (sp->spx_cc & SPX_OB) { 420 MCHTYPE(m, MT_OOBDATA); 421 spx_newchecks[1]++; 422 so->so_oobmark = 0; 423 so->so_rcv.sb_state &= ~SBS_RCVATMARK; 424 } 425 if (packetp == 0) { 426 m->m_data += SPINC; 427 m->m_len -= SPINC; 428 m->m_pkthdr.len -= SPINC; 429 } 430 if ((sp->spx_cc & SPX_EM) || packetp) { 431 sbappendrecord_locked(&so->so_rcv, m); 432 spx_newchecks[9]++; 433 } else 434 sbappend_locked(&so->so_rcv, m); 435 } else 436#endif 437 if (packetp) 438 sbappendrecord_locked(&so->so_rcv, m); 439 else { 440 cb->s_rhdr = *mtod(m, struct spxhdr *); 441 m->m_data += SPINC; 442 m->m_len -= SPINC; 443 m->m_pkthdr.len -= SPINC; 444 sbappend_locked(&so->so_rcv, m); 445 } 446 } else 447 break; 448 } 449 if (wakeup) 450 sorwakeup_locked(so); 451 else 452 SOCKBUF_UNLOCK(&so->so_rcv); 453 return (0); 454} 455