1/*
2 * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <netinet/in_systm.h>
31#include <sys/socket.h>
32#include <sys/socketvar.h>
33
34#include <net/route.h>
35#include <netinet/in.h>
36#include <net/if.h>
37
38#include <netinet/ip.h>
39#include <netinet/ip_var.h>
40#include <netinet/in_var.h>
41#include <netinet/tcp.h>
42#include <netinet/tcp_var.h>
43#include <netinet/tcp_seq.h>
44#include <netinet/tcpip.h>
45#include <netinet/tcp_fsm.h>
46#include <netinet/mptcp_var.h>
47#include <netinet/mptcp.h>
48#include <netinet/mptcp_opt.h>
49#include <netinet/mptcp_seq.h>
50
51#include <libkern/crypto/sha1.h>
52#include <netinet/mptcp_timer.h>
53
54#include <mach/sdt.h>
55
56/*
57 * SYSCTL for enforcing 64 bit dsn
58 */
59int32_t force_64bit_dsn = 0;
60SYSCTL_INT(_net_inet_mptcp, OID_AUTO, force_64bit_dsn,
61    CTLFLAG_RW|CTLFLAG_LOCKED, &force_64bit_dsn, 0,
62    "Force MPTCP 64bit dsn");
63
64
65static int mptcp_validate_join_hmac(struct tcpcb *, u_char*, int);
66static int mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen);
67
68/*
69 * MPTCP Options Output Processing
70 */
71
72static unsigned
73mptcp_setup_first_subflow_syn_opts(struct socket *so, int flags, u_char *opt,
74    unsigned optlen)
75{
76	struct tcpcb *tp = sototcpcb(so);
77	struct mptcb *mp_tp = NULL;
78	mp_tp = tptomptp(tp);
79
80	if (!(so->so_flags & SOF_MP_SUBFLOW))
81		return (optlen);
82
83	/*
84	 * Avoid retransmitting the MP_CAPABLE option.
85	 */
86	if (tp->t_rxtshift > mptcp_mpcap_retries)
87		return (optlen);
88
89	if ((flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
90		struct mptcp_mpcapable_opt_rsp mptcp_opt;
91		mptcp_key_t mp_localkey = 0;
92
93		mp_localkey = mptcp_get_localkey(mp_tp);
94		if (mp_localkey == 0) {
95			/* an embryonic connection was closed from above */
96			return (optlen);
97		}
98		bzero(&mptcp_opt,
99		    sizeof (struct mptcp_mpcapable_opt_rsp));
100		mptcp_opt.mmc_common.mmco_kind = TCPOPT_MULTIPATH;
101		mptcp_opt.mmc_common.mmco_len =
102		    sizeof (struct mptcp_mpcapable_opt_rsp);
103		mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE;
104		MPT_LOCK_SPIN(mp_tp);
105		mptcp_opt.mmc_common.mmco_version = mp_tp->mpt_version;
106		mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT;
107		if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
108			mptcp_opt.mmc_common.mmco_flags |=
109			    MPCAP_CHECKSUM_CBIT;
110		MPT_UNLOCK(mp_tp);
111		mptcp_opt.mmc_localkey = mp_localkey;
112		memcpy(opt + optlen, &mptcp_opt,
113		    mptcp_opt.mmc_common.mmco_len);
114		optlen += mptcp_opt.mmc_common.mmco_len;
115		if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) {
116			printf("%s: SYN_ACK localkey = %llx \n",
117			    __func__, mp_localkey);
118		}
119	} else {
120		/* Only the SYN flag is set */
121		struct mptcp_mpcapable_opt_common mptcp_opt;
122		mptcp_key_t mp_localkey = 0;
123		mp_localkey = mptcp_get_localkey(mp_tp);
124		so->so_flags |= SOF_MPTCP_CLIENT;
125		if (mp_localkey == 0) {
126			/* an embryonic connection was closed */
127			return (optlen);
128		}
129		bzero(&mptcp_opt,
130		    sizeof (struct mptcp_mpcapable_opt_common));
131		mptcp_opt.mmco_kind = TCPOPT_MULTIPATH;
132		mptcp_opt.mmco_len =
133		    sizeof (struct mptcp_mpcapable_opt_common) +
134		    sizeof (mptcp_key_t);
135		mptcp_opt.mmco_subtype = MPO_CAPABLE;
136		MPT_LOCK_SPIN(mp_tp);
137		mptcp_opt.mmco_version = mp_tp->mpt_version;
138		mptcp_opt.mmco_flags |= MPCAP_PROPOSAL_SBIT;
139		if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
140			mptcp_opt.mmco_flags |= MPCAP_CHECKSUM_CBIT;
141		MPT_UNLOCK(mp_tp);
142		(void) memcpy(opt + optlen, &mptcp_opt,
143		    sizeof (struct mptcp_mpcapable_opt_common));
144		optlen += sizeof (struct mptcp_mpcapable_opt_common);
145		(void) memcpy(opt + optlen, &mp_localkey,
146		    sizeof (mptcp_key_t));
147		optlen += sizeof (mptcp_key_t);
148	}
149
150	return (optlen);
151}
152
153static unsigned
154mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt,
155    unsigned optlen)
156{
157	struct inpcb *inp = sotoinpcb(so);
158	struct tcpcb *tp = NULL;
159
160	if (!inp)
161		return (optlen);
162
163	tp = intotcpcb(inp);
164	if (!tp)
165		return (optlen);
166
167	if (!tp->t_mptcb)
168		return (optlen);
169
170	if ((flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
171		struct mptcp_mpjoin_opt_rsp mpjoin_rsp;
172		struct mptcb *mp_tp = tptomptp(tp);
173
174		if (mp_tp == NULL)
175			return (optlen);
176
177		MPT_LOCK(mp_tp);
178		if (mptcp_get_localkey(mp_tp) == 0) {
179			MPT_UNLOCK(mp_tp);
180			return (optlen);
181		}
182		MPT_UNLOCK(mp_tp);
183		bzero(&mpjoin_rsp, sizeof (mpjoin_rsp));
184		mpjoin_rsp.mmjo_kind = TCPOPT_MULTIPATH;
185		mpjoin_rsp.mmjo_len = sizeof (mpjoin_rsp);
186		mpjoin_rsp.mmjo_subtype_bkp = MPO_JOIN << 4;
187		if (tp->t_mpflags & TMPF_BACKUP_PATH)
188			mpjoin_rsp.mmjo_subtype_bkp |= MPTCP_BACKUP;
189		mpjoin_rsp.mmjo_addr_id = tp->t_local_aid;
190		mptcp_get_rands(tp->t_local_aid, tptomptp(tp),
191		    &mpjoin_rsp.mmjo_rand, NULL);
192		mpjoin_rsp.mmjo_mac = mptcp_get_trunced_hmac(tp->t_local_aid,
193		    mp_tp);
194		memcpy(opt + optlen, &mpjoin_rsp, mpjoin_rsp.mmjo_len);
195		optlen += mpjoin_rsp.mmjo_len;
196	} else {
197		struct mptcp_mpjoin_opt_req mpjoin_req;
198		bzero(&mpjoin_req, sizeof (mpjoin_req));
199		mpjoin_req.mmjo_kind = TCPOPT_MULTIPATH;
200		mpjoin_req.mmjo_len = sizeof (mpjoin_req);
201		mpjoin_req.mmjo_subtype_bkp = MPO_JOIN << 4;
202		/* A secondary subflow is started off as backup */
203		mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP;
204		tp->t_mpflags |= TMPF_BACKUP_PATH;
205		mpjoin_req.mmjo_addr_id = tp->t_local_aid;
206		mpjoin_req.mmjo_peer_token = mptcp_get_remotetoken(tp->t_mptcb);
207		if (mpjoin_req.mmjo_peer_token == 0) {
208			if (mptcp_dbg >= MP_ERR_DEBUG)
209				printf("%s: zero peer token \n", __func__);
210		}
211		mptcp_get_rands(tp->t_local_aid, tptomptp(tp),
212		    &mpjoin_req.mmjo_rand, NULL);
213		memcpy(opt + optlen, &mpjoin_req, mpjoin_req.mmjo_len);
214		optlen += mpjoin_req.mmjo_len;
215		/* send an event up, if Fast Join is requested */
216		if (mptcp_zerortt_fastjoin &&
217		    (so->so_flags & SOF_MPTCP_FASTJOIN)) {
218			soevent(so,
219			    (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFASTJ));
220			if (mptcp_dbg >= MP_ERR_DEBUG)
221				printf("%s: fast join request\n", __func__);
222		}
223	}
224	return (optlen);
225}
226
227unsigned
228mptcp_setup_join_ack_opts(struct tcpcb *tp, u_char *opt, unsigned optlen)
229{
230	unsigned new_optlen;
231	struct mptcp_mpjoin_opt_rsp2 join_rsp2;
232
233	if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpjoin_opt_rsp2)) {
234		printf("%s: no space left %d \n", __func__, optlen);
235		return (optlen);
236	}
237
238	bzero(&join_rsp2, sizeof (struct mptcp_mpjoin_opt_rsp2));
239	join_rsp2.mmjo_kind = TCPOPT_MULTIPATH;
240	join_rsp2.mmjo_len = sizeof (struct mptcp_mpjoin_opt_rsp2);
241	join_rsp2.mmjo_subtype = MPO_JOIN;
242	mptcp_get_hmac(tp->t_local_aid, tptomptp(tp),
243	    (u_char*)&join_rsp2.mmjo_mac,
244	    sizeof (join_rsp2.mmjo_mac));
245	memcpy(opt + optlen, &join_rsp2, join_rsp2.mmjo_len);
246	new_optlen = optlen + join_rsp2.mmjo_len;
247	tp->t_mpflags |= TMPF_FASTJOINBY2_SEND;
248	return (new_optlen);
249}
250
251unsigned
252mptcp_setup_syn_opts(struct socket *so, int flags, u_char *opt, unsigned optlen)
253{
254	unsigned new_optlen;
255
256	if (mptcp_enable == 0) {
257		/* do nothing */
258		return (optlen);
259	}
260
261	if (!(so->so_flags & SOF_MP_SEC_SUBFLOW)) {
262		new_optlen = mptcp_setup_first_subflow_syn_opts(so, flags, opt,
263		    optlen);
264	} else {
265		/*
266		 * To simulate SYN_ACK with no join opt, comment this line on
267		 * OS X server side. This serves as a testing hook.
268		 */
269		new_optlen = mptcp_setup_join_subflow_syn_opts(so, flags, opt,
270		    optlen);
271	}
272	return (new_optlen);
273}
274
275static int
276mptcp_send_mpfail(struct tcpcb *tp, u_char *opt, unsigned int optlen)
277{
278#pragma unused(tp, opt, optlen)
279
280	struct mptcb *mp_tp = NULL;
281	struct mptcp_mpfail_opt fail_opt;
282	uint64_t dsn;
283	int len = sizeof (struct mptcp_mpfail_opt);
284
285	mp_tp = tptomptp(tp);
286	if (mp_tp == NULL) {
287		tp->t_mpflags &= ~TMPF_SND_MPFAIL;
288		return (optlen);
289	}
290
291	/* if option space low give up */
292	if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpfail_opt)) {
293		tp->t_mpflags &= ~TMPF_SND_MPFAIL;
294		return (optlen);
295	}
296
297	MPT_LOCK(mp_tp);
298	dsn = mp_tp->mpt_rcvnxt;
299	MPT_UNLOCK(mp_tp);
300
301	bzero(&fail_opt, sizeof (fail_opt));
302	fail_opt.mfail_kind = TCPOPT_MULTIPATH;
303	fail_opt.mfail_len = len;
304	fail_opt.mfail_subtype = MPO_FAIL;
305	fail_opt.mfail_dsn = mptcp_hton64(dsn);
306	memcpy(opt + optlen, &fail_opt, len);
307	optlen += len;
308	tp->t_mpflags &= ~TMPF_SND_MPFAIL;
309	if (mptcp_dbg >= MP_ERR_DEBUG)
310		printf("%s: %d \n", __func__, tp->t_local_aid);
311	return (optlen);
312}
313
314static int
315mptcp_send_infinite_mapping(struct tcpcb *tp, u_char *opt, unsigned int optlen)
316{
317	struct mptcp_dsn_opt infin_opt;
318	struct mptcb *mp_tp = NULL;
319	size_t len = sizeof (struct mptcp_dsn_opt);
320	struct socket *so = tp->t_inpcb->inp_socket;
321	int error = 0;
322	int csum_len = 0;
323
324	if (!so)
325		return (optlen);
326
327	mp_tp = tptomptp(tp);
328	if (mp_tp == NULL)
329		return (optlen);
330
331	MPT_LOCK(mp_tp);
332	if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
333		csum_len = 2;
334
335	/* try later */
336	if ((MAX_TCPOPTLEN - optlen) < (len + csum_len)) {
337		MPT_UNLOCK(mp_tp);
338		return (optlen);
339	}
340	bzero(&infin_opt, sizeof (infin_opt));
341	infin_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
342	infin_opt.mdss_copt.mdss_len = len + csum_len;
343	infin_opt.mdss_copt.mdss_subtype = MPO_DSS;
344	infin_opt.mdss_copt.mdss_flags |= MDSS_M;
345	if (mp_tp->mpt_flags & MPTCPF_RECVD_MPFAIL) {
346		infin_opt.mdss_dsn = (u_int32_t)
347		    MPTCP_DATASEQ_LOW32(mp_tp->mpt_dsn_at_csum_fail);
348		infin_opt.mdss_subflow_seqn = mp_tp->mpt_ssn_at_csum_fail;
349	} else {
350		infin_opt.mdss_dsn = (u_int32_t)
351		    MPTCP_DATASEQ_LOW32(mp_tp->mpt_snduna);
352		infin_opt.mdss_subflow_seqn = tp->snd_una - tp->iss;
353	}
354	MPT_UNLOCK(mp_tp);
355	if (error != 0)
356		return (optlen);
357	if ((infin_opt.mdss_dsn == 0) || (infin_opt.mdss_subflow_seqn == 0)) {
358		return (optlen);
359	}
360	infin_opt.mdss_dsn = htonl(infin_opt.mdss_dsn);
361	infin_opt.mdss_subflow_seqn = htonl(infin_opt.mdss_subflow_seqn);
362	infin_opt.mdss_data_len = 0;
363
364	memcpy(opt + optlen, &infin_opt, len);
365	optlen += len;
366	if (csum_len != 0) {
367		/* The checksum field is set to 0 for infinite mapping */
368		uint16_t csum = 0;
369		memcpy(opt + optlen, &csum, csum_len);
370		optlen += csum_len;
371	}
372
373	if (mptcp_dbg == MP_VERBOSE_DEBUG_1) {
374		printf("%s: dsn = %x, seq = %x len = %x\n", __func__,
375		    ntohl(infin_opt.mdss_dsn),
376		    ntohl(infin_opt.mdss_subflow_seqn),
377		    ntohs(infin_opt.mdss_data_len));
378	}
379
380	/* so->so_flags &= ~SOF_MPTCP_CLIENT; */
381	tp->t_mpflags |= TMPF_INFIN_SENT;
382	tcpstat.tcps_estab_fallback++;
383	return (optlen);
384}
385
386
387static int
388mptcp_ok_to_fin(struct tcpcb *tp, u_int64_t dsn, u_int32_t datalen)
389{
390	struct mptcb *mp_tp = NULL;
391	mp_tp = tptomptp(tp);
392
393	MPT_LOCK(mp_tp);
394	dsn = (mp_tp->mpt_sndmax & MPTCP_DATASEQ_LOW32_MASK) | dsn;
395	if ((dsn + datalen) == mp_tp->mpt_sndmax) {
396		MPT_UNLOCK(mp_tp);
397		return (1);
398	}
399	MPT_UNLOCK(mp_tp);
400	return (0);
401}
402
403
404/* Must be called from tcp_output to fill in the fast close option */
405static int
406mptcp_send_fastclose(struct tcpcb *tp, u_char *opt, unsigned int optlen,
407	int flags)
408{
409	struct mptcp_fastclose_opt fastclose_opt;
410	struct mptcb *mp_tp = tptomptp(tp);
411
412	/* Only ACK flag should be set */
413	if (flags != TH_ACK)
414		return (optlen);
415
416	if ((MAX_TCPOPTLEN - optlen) <
417		sizeof (struct mptcp_fastclose_opt)) {
418		return (optlen);
419	}
420
421	bzero(&fastclose_opt, sizeof (struct mptcp_fastclose_opt));
422	fastclose_opt.mfast_kind = TCPOPT_MULTIPATH;
423	fastclose_opt.mfast_len = sizeof (struct mptcp_fastclose_opt);
424	fastclose_opt.mfast_subtype = MPO_FASTCLOSE;
425	MPT_LOCK_SPIN(mp_tp);
426	fastclose_opt.mfast_key =  mptcp_get_remotekey(mp_tp);
427	MPT_UNLOCK(mp_tp);
428	memcpy(opt + optlen, &fastclose_opt, fastclose_opt.mfast_len);
429	optlen += fastclose_opt.mfast_len;
430
431	return (optlen);
432}
433
434unsigned int
435mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt,
436    unsigned int optlen, int flags, int datalen,
437    unsigned int **dss_lenp, u_int8_t **finp, u_int64_t *dss_valp,
438    u_int32_t **sseqp, boolean_t *p_mptcp_acknow)
439{
440	struct inpcb *inp = (struct inpcb *)tp->t_inpcb;
441	struct socket *so = inp->inp_socket;
442	struct mptcb *mp_tp = tptomptp(tp);
443	boolean_t do_csum = FALSE;
444	boolean_t send_64bit_dsn = FALSE;
445	boolean_t send_64bit_ack = FALSE;
446	u_int32_t old_mpt_flags = tp->t_mpflags &
447	       	(TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL);
448
449	if ((mptcp_enable == 0) ||
450	    (mp_tp == NULL) ||
451	    (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) ||
452	    (tp->t_state == TCPS_CLOSED)) {
453		/* do nothing */
454		goto ret_optlen;
455	}
456
457	if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
458		do_csum = TRUE;
459	}
460
461	/* tcp_output handles the SYN path separately */
462	if (flags & TH_SYN) {
463		goto ret_optlen;
464	}
465
466	if ((MAX_TCPOPTLEN - optlen) <
467	    sizeof (struct mptcp_mpcapable_opt_common)) {
468		if (mptcp_dbg >= MP_ERR_DEBUG) {
469			printf("MPTCP ERROR %s: no space left %d flags %x "
470			    "tp->t_mpflags %x"
471			    "len %d\n", __func__, optlen, flags, tp->t_mpflags,
472			    datalen);
473		}
474		goto ret_optlen;
475	}
476
477	if (tp->t_mpflags & TMPF_FASTCLOSE) {
478		optlen = mptcp_send_fastclose(tp, opt, optlen, flags);
479		VERIFY(datalen == 0);
480		goto ret_optlen;
481	}
482
483	if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
484		if (tp->t_mpflags & TMPF_SND_MPFAIL)
485			optlen = mptcp_send_mpfail(tp, opt, optlen);
486		else if (!(tp->t_mpflags & TMPF_INFIN_SENT))
487			optlen = mptcp_send_infinite_mapping(tp, opt, optlen);
488		goto ret_optlen;
489	}
490
491	if (tp->t_mpflags & TMPF_SND_MPPRIO) {
492		optlen = mptcp_snd_mpprio(tp, opt, optlen);
493		goto ret_optlen;
494	}
495
496	if (((tp->t_mpflags & TMPF_FASTJOINBY2_SEND) ||
497	    (tp->t_mpflags & TMPF_FASTJOIN_SEND )) &&
498	    (datalen > 0)) {
499		tp->t_mpflags &= ~TMPF_FASTJOINBY2_SEND;
500		tp->t_mpflags &= ~TMPF_FASTJOIN_SEND;
501		goto fastjoin_send;
502	}
503
504	if ((tp->t_mpflags & TMPF_PREESTABLISHED) &&
505	    (!(tp->t_mpflags & TMPF_SENT_KEYS)) &&
506	    (!(tp->t_mpflags & TMPF_JOINED_FLOW))) {
507		struct mptcp_mpcapable_opt_rsp1 mptcp_opt;
508		if ((MAX_TCPOPTLEN - optlen) <
509		    sizeof (struct mptcp_mpcapable_opt_rsp1))
510			goto ret_optlen;
511		bzero(&mptcp_opt, sizeof (struct mptcp_mpcapable_opt_rsp1));
512		mptcp_opt.mmc_common.mmco_kind = TCPOPT_MULTIPATH;
513		mptcp_opt.mmc_common.mmco_len =
514		    sizeof (struct mptcp_mpcapable_opt_rsp1);
515		mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE;
516		mptcp_opt.mmc_common.mmco_version = MP_DRAFT_VERSION_12;
517		/* HMAC-SHA1 is the proposal */
518		mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT;
519		MPT_LOCK(mp_tp);
520		if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
521			mptcp_opt.mmc_common.mmco_flags |= MPCAP_CHECKSUM_CBIT;
522		mptcp_opt.mmc_localkey = mptcp_get_localkey(mp_tp);
523		mptcp_opt.mmc_remotekey = mptcp_get_remotekey(mp_tp);
524		MPT_UNLOCK(mp_tp);
525		memcpy(opt + optlen, &mptcp_opt, mptcp_opt.mmc_common.mmco_len);
526		optlen += mptcp_opt.mmc_common.mmco_len;
527		tp->t_mpflags |= TMPF_SENT_KEYS;
528		so->so_flags |= SOF_MPTCP_TRUE;
529		tp->t_mpflags &= ~TMPF_PREESTABLISHED;
530		tp->t_mpflags |= TMPF_MPTCP_TRUE;
531
532		if (!tp->t_mpuna) {
533			tp->t_mpuna = tp->snd_una;
534		} else {
535			/* its a retransmission of the MP_CAPABLE ACK */
536		}
537		if (mptcp_dbg >= MP_ERR_DEBUG) {
538			printf("MPTCP SUCCESS %s: established.\n", __func__);
539		}
540		goto ret_optlen;
541	} else if (tp->t_mpflags & TMPF_MPTCP_TRUE) {
542		if (tp->t_mpflags & TMPF_SND_REM_ADDR) {
543			int rem_opt_len = sizeof (struct mptcp_remaddr_opt);
544			if ((optlen + rem_opt_len) <= MAX_TCPOPTLEN) {
545				mptcp_send_remaddr_opt(tp,
546				    (struct mptcp_remaddr_opt *)(opt + optlen));
547				optlen += rem_opt_len;
548				goto ret_optlen;
549			} else {
550				tp->t_mpflags &= ~TMPF_SND_REM_ADDR;
551			}
552		}
553	}
554
555	if ((tp->t_mpflags & TMPF_JOINED_FLOW) &&
556	    (tp->t_mpflags & TMPF_PREESTABLISHED) &&
557	    (!(tp->t_mpflags & TMPF_RECVD_JOIN)) &&
558	    (tp->t_mpflags & TMPF_SENT_JOIN) &&
559	    (!(tp->t_mpflags & TMPF_MPTCP_TRUE))) {
560	    	MPT_LOCK(mp_tp);
561	    	if (mptcp_get_localkey(mp_tp) == 0) {
562			MPT_UNLOCK(mp_tp);
563			goto ret_optlen;
564		}
565		MPT_UNLOCK(mp_tp);
566		/* Do the ACK part */
567		optlen = mptcp_setup_join_ack_opts(tp, opt, optlen);
568		if (!tp->t_mpuna) {
569			tp->t_mpuna = tp->snd_una;
570		}
571		/* Start a timer to retransmit the ACK */
572		tp->t_timer[TCPT_JACK_RXMT] =
573			    OFFSET_FROM_START(tp, tcp_jack_rxmt);
574		goto ret_optlen;
575	}
576
577	if (!(tp->t_mpflags & TMPF_MPTCP_TRUE))
578		goto ret_optlen;
579fastjoin_send:
580	/*
581	 * From here on, all options are sent only if MPTCP_TRUE
582	 * or when data is sent early on as in Fast Join
583	 */
584
585	MPT_LOCK(mp_tp);
586	if ((mp_tp->mpt_flags & MPTCPF_SND_64BITDSN) || force_64bit_dsn) {
587		send_64bit_dsn = TRUE;
588	}
589	if (mp_tp->mpt_flags & MPTCPF_SND_64BITACK) {
590		send_64bit_ack = TRUE;
591	}
592	MPT_UNLOCK(mp_tp);
593
594#define	CHECK_OPTLEN	{						\
595	if ((MAX_TCPOPTLEN - optlen) < len) {				\
596		if (mptcp_dbg >= MP_ERR_DEBUG) {			\
597			printf("MPTCP ERROR %s: len %d optlen %d \n",	\
598			    __func__,					\
599			    len, optlen);				\
600		}							\
601		goto ret_optlen;					\
602	}								\
603}
604
605#define	DO_FIN(dsn_opt) {						\
606	int sndfin = 0;							\
607	sndfin = mptcp_ok_to_fin(tp, dsn_opt.mdss_dsn, datalen);	\
608	if (sndfin) {							\
609		dsn_opt.mdss_copt.mdss_flags |= MDSS_F;			\
610		*finp = opt + optlen + offsetof(struct mptcp_dss_copt,	\
611		    mdss_flags);					\
612		dsn_opt.mdss_data_len += 1;    				\
613	}								\
614}
615
616#define	CHECK_DATALEN {							\
617	/* MPTCP socket does not support IP options */			\
618	if ((datalen + optlen + len) > tp->t_maxopd) {			\
619		if (mptcp_dbg >= MP_VERBOSE_DEBUG_2)			\
620			printf("%s: nosp %d len %d opt %d %d %d\n",	\
621			    __func__, datalen, len, optlen,		\
622			    tp->t_maxseg, tp->t_maxopd);		\
623		/* remove option length from payload len */		\
624		datalen = tp->t_maxopd - optlen - len;			\
625	}								\
626}
627
628	if ((tp->t_mpflags & TMPF_SEND_DSN) &&
629	    (send_64bit_dsn)) {
630		/*
631		 * If there was the need to send 64-bit Data ACK along
632		 * with 64-bit DSN, then 26 or 28 bytes would be used.
633		 * With timestamps and NOOP padding that will cause
634		 * overflow. Hence, in the rare event that both 64-bit
635		 * DSN and 64-bit ACK have to be sent, delay the send of
636		 * 64-bit ACK until our 64-bit DSN is acked with a 64-bit ack.
637		 * XXX If this delay causes issue, remove the 2-byte padding.
638		 */
639		struct mptcp_dss64_ack32_opt dsn_ack_opt;
640		unsigned int len = sizeof (dsn_ack_opt);
641
642		if (do_csum) {
643			len += 2;
644		}
645
646		CHECK_OPTLEN;
647
648		bzero(&dsn_ack_opt, sizeof (dsn_ack_opt));
649		dsn_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
650		dsn_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
651		dsn_ack_opt.mdss_copt.mdss_len = len;
652		dsn_ack_opt.mdss_copt.mdss_flags |=
653		    MDSS_M | MDSS_m | MDSS_A;
654
655		CHECK_DATALEN;
656
657		mptcp_output_getm_dsnmap64(so, off, (u_int32_t)datalen,
658		    &dsn_ack_opt.mdss_dsn,
659		    &dsn_ack_opt.mdss_subflow_seqn,
660		    &dsn_ack_opt.mdss_data_len);
661
662		*dss_valp = dsn_ack_opt.mdss_dsn;
663
664		if ((dsn_ack_opt.mdss_data_len == 0) ||
665		    (dsn_ack_opt.mdss_dsn == 0)) {
666			goto ret_optlen;
667		}
668
669		if (tp->t_mpflags & TMPF_SEND_DFIN) {
670			DO_FIN(dsn_ack_opt);
671		}
672
673		MPT_LOCK(mp_tp);
674		dsn_ack_opt.mdss_ack =
675		    htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
676		MPT_UNLOCK(mp_tp);
677
678		dsn_ack_opt.mdss_dsn = mptcp_hton64(dsn_ack_opt.mdss_dsn);
679		dsn_ack_opt.mdss_subflow_seqn = htonl(
680		    dsn_ack_opt.mdss_subflow_seqn);
681		dsn_ack_opt.mdss_data_len = htons(
682		    dsn_ack_opt.mdss_data_len);
683		*dss_lenp = (unsigned int *)(void *)(opt + optlen +
684		    offsetof(struct mptcp_dss64_ack32_opt, mdss_data_len));
685
686		memcpy(opt + optlen, &dsn_ack_opt, sizeof (dsn_ack_opt));
687
688		if (do_csum) {
689			*sseqp = (u_int32_t *)(void *)(opt + optlen +
690			    offsetof(struct mptcp_dss64_ack32_opt,
691			    mdss_subflow_seqn));
692		}
693		optlen += len;
694		if (mptcp_dbg == MP_VERBOSE_DEBUG_2) {
695			printf("%s: long DSS = %llx ACK = %llx \n",
696			    __func__,
697			    mptcp_ntoh64(dsn_ack_opt.mdss_dsn),
698			    mptcp_ntoh64(dsn_ack_opt.mdss_ack));
699		}
700		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
701		goto ret_optlen;
702	}
703
704	if ((tp->t_mpflags & TMPF_SEND_DSN) &&
705	    (!send_64bit_dsn) &&
706	    !(tp->t_mpflags & TMPF_MPTCP_ACKNOW))  {
707		struct mptcp_dsn_opt dsn_opt;
708		unsigned int len = sizeof (struct mptcp_dsn_opt);
709
710		if (do_csum) {
711			len += 2;
712		}
713
714		CHECK_OPTLEN;
715
716		bzero(&dsn_opt, sizeof (dsn_opt));
717		dsn_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
718		dsn_opt.mdss_copt.mdss_subtype = MPO_DSS;
719		dsn_opt.mdss_copt.mdss_len = len;
720		dsn_opt.mdss_copt.mdss_flags |= MDSS_M;
721
722		CHECK_DATALEN;
723
724		mptcp_output_getm_dsnmap32(so, off, (u_int32_t)datalen,
725		    &dsn_opt.mdss_dsn,
726		    &dsn_opt.mdss_subflow_seqn, &dsn_opt.mdss_data_len,
727		    dss_valp);
728
729		if ((dsn_opt.mdss_data_len == 0) ||
730		    (dsn_opt.mdss_dsn == 0)) {
731			goto ret_optlen;
732		}
733
734		if (tp->t_mpflags & TMPF_SEND_DFIN) {
735			DO_FIN(dsn_opt);
736		}
737
738		dsn_opt.mdss_dsn = htonl(dsn_opt.mdss_dsn);
739		dsn_opt.mdss_subflow_seqn = htonl(dsn_opt.mdss_subflow_seqn);
740		dsn_opt.mdss_data_len = htons(dsn_opt.mdss_data_len);
741		*dss_lenp = (unsigned int *)(void *)(opt + optlen +
742		    offsetof(struct mptcp_dsn_opt, mdss_data_len));
743		memcpy(opt + optlen, &dsn_opt, sizeof (dsn_opt));
744		if (do_csum) {
745			*sseqp = (u_int32_t *)(void *)(opt + optlen +
746			    offsetof(struct mptcp_dsn_opt, mdss_subflow_seqn));
747		}
748		optlen += len;
749		if (mptcp_dbg == MP_VERBOSE_DEBUG_2) {
750			printf("%s: DSS option. dsn = %x, seq = %x len = %x\n",
751			    __func__,
752			    ntohl(dsn_opt.mdss_dsn),
753			    ntohl(dsn_opt.mdss_subflow_seqn),
754			    ntohs(dsn_opt.mdss_data_len));
755		}
756		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
757		goto ret_optlen;
758	}
759
760	/* 32-bit Data ACK option */
761	if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) &&
762	    (!send_64bit_ack) &&
763	    !(tp->t_mpflags & TMPF_SEND_DSN) &&
764	    !(tp->t_mpflags & TMPF_SEND_DFIN)) {
765
766		struct mptcp_data_ack_opt dack_opt;
767		unsigned int len = 0;
768do_ack32_only:
769		len = sizeof (dack_opt);
770
771		CHECK_OPTLEN;
772
773		bzero(&dack_opt, len);
774		dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
775		dack_opt.mdss_copt.mdss_len = len;
776		dack_opt.mdss_copt.mdss_subtype = MPO_DSS;
777		dack_opt.mdss_copt.mdss_flags |= MDSS_A;
778		MPT_LOCK_SPIN(mp_tp);
779		dack_opt.mdss_ack =
780		    htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
781		MPT_UNLOCK(mp_tp);
782		memcpy(opt + optlen, &dack_opt, len);
783		optlen += len;
784		VERIFY(optlen <= MAX_TCPOPTLEN);
785		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
786		goto ret_optlen;
787	}
788
789	/* 64-bit Data ACK option */
790	if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) &&
791	    (send_64bit_ack) &&
792	    !(tp->t_mpflags & TMPF_SEND_DSN) &&
793	    !(tp->t_mpflags & TMPF_SEND_DFIN)) {
794		struct mptcp_data_ack64_opt dack_opt;
795		unsigned int len = 0;
796do_ack64_only:
797		len = sizeof (dack_opt);
798
799		CHECK_OPTLEN;
800
801		bzero(&dack_opt, len);
802		dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
803		dack_opt.mdss_copt.mdss_len = len;
804		dack_opt.mdss_copt.mdss_subtype = MPO_DSS;
805		dack_opt.mdss_copt.mdss_flags |= (MDSS_A | MDSS_a);
806		MPT_LOCK_SPIN(mp_tp);
807		dack_opt.mdss_ack = mptcp_hton64(mp_tp->mpt_rcvnxt);
808		/*
809		 * The other end should retransmit 64-bit DSN until it
810		 * receives a 64-bit ACK.
811		 */
812		mp_tp->mpt_flags &= ~MPTCPF_SND_64BITACK;
813		MPT_UNLOCK(mp_tp);
814		memcpy(opt + optlen, &dack_opt, len);
815		optlen += len;
816		VERIFY(optlen <= MAX_TCPOPTLEN);
817		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
818		goto ret_optlen;
819	}
820
821	/* 32-bit DSS+Data ACK option */
822	if ((tp->t_mpflags & TMPF_SEND_DSN) &&
823	    (!send_64bit_dsn) &&
824	    (!send_64bit_ack) &&
825	    (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
826		struct mptcp_dss_ack_opt dss_ack_opt;
827		unsigned int len = sizeof (dss_ack_opt);
828
829		if (do_csum)
830			len += 2;
831
832		CHECK_OPTLEN;
833
834		bzero(&dss_ack_opt, sizeof (dss_ack_opt));
835		dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
836		dss_ack_opt.mdss_copt.mdss_len = len;
837		dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
838		dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M;
839		MPT_LOCK_SPIN(mp_tp);
840		dss_ack_opt.mdss_ack =
841		    htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
842		MPT_UNLOCK(mp_tp);
843
844		CHECK_DATALEN;
845
846		mptcp_output_getm_dsnmap32(so, off, (u_int32_t)datalen,
847		    &dss_ack_opt.mdss_dsn,
848		    &dss_ack_opt.mdss_subflow_seqn,
849		    &dss_ack_opt.mdss_data_len,
850		    dss_valp);
851
852		if ((dss_ack_opt.mdss_data_len == 0) ||
853		    (dss_ack_opt.mdss_dsn == 0)) {
854			goto do_ack32_only;
855		}
856
857		if (tp->t_mpflags & TMPF_SEND_DFIN) {
858			DO_FIN(dss_ack_opt);
859		}
860
861		dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn);
862		dss_ack_opt.mdss_subflow_seqn =
863		    htonl(dss_ack_opt.mdss_subflow_seqn);
864		dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
865		*dss_lenp = (unsigned int *)(void *)(opt + optlen +
866		    offsetof(struct mptcp_dss_ack_opt, mdss_data_len));
867		memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt));
868		if (do_csum) {
869			*sseqp = (u_int32_t *)(void *)(opt + optlen +
870			    offsetof(struct mptcp_dss_ack_opt,
871			    mdss_subflow_seqn));
872		}
873
874		optlen += len;
875
876		if (optlen > MAX_TCPOPTLEN)
877			panic("optlen too large");
878		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
879		goto ret_optlen;
880	}
881
882	/* 32-bit DSS + 64-bit DACK option */
883	if ((tp->t_mpflags & TMPF_SEND_DSN) &&
884	    (!send_64bit_dsn) &&
885	    (send_64bit_ack) &&
886	    (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) {
887		struct mptcp_dss32_ack64_opt dss_ack_opt;
888		unsigned int len = sizeof (dss_ack_opt);
889
890		if (do_csum)
891			len += 2;
892
893		CHECK_OPTLEN;
894
895		bzero(&dss_ack_opt, sizeof (dss_ack_opt));
896		dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
897		dss_ack_opt.mdss_copt.mdss_len = len;
898		dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
899		dss_ack_opt.mdss_copt.mdss_flags |= MDSS_M | MDSS_A | MDSS_a;
900		MPT_LOCK_SPIN(mp_tp);
901		dss_ack_opt.mdss_ack =
902		    mptcp_hton64(mp_tp->mpt_rcvnxt);
903		MPT_UNLOCK(mp_tp);
904
905		CHECK_DATALEN;
906
907		mptcp_output_getm_dsnmap32(so, off, (u_int32_t)datalen,
908		    &dss_ack_opt.mdss_dsn, &dss_ack_opt.mdss_subflow_seqn,
909		    &dss_ack_opt.mdss_data_len, dss_valp);
910
911		if ((dss_ack_opt.mdss_data_len == 0) ||
912		    (dss_ack_opt.mdss_dsn == 0)) {
913			goto do_ack64_only;
914		}
915
916		if (tp->t_mpflags & TMPF_SEND_DFIN) {
917			DO_FIN(dss_ack_opt);
918		}
919
920		dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn);
921		dss_ack_opt.mdss_subflow_seqn =
922		    htonl(dss_ack_opt.mdss_subflow_seqn);
923		dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
924		*dss_lenp = (unsigned int *)(void *)(opt + optlen +
925		    offsetof(struct mptcp_dss32_ack64_opt, mdss_data_len));
926		memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt));
927		if (do_csum) {
928			*sseqp = (u_int32_t *)(void *)(opt + optlen +
929			    offsetof(struct mptcp_dss32_ack64_opt,
930			    mdss_subflow_seqn));
931		}
932
933		optlen += len;
934
935		if (optlen > MAX_TCPOPTLEN)
936			panic("optlen too large");
937		tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW;
938		goto ret_optlen;
939	}
940
941	if (tp->t_mpflags & TMPF_SEND_DFIN) {
942		struct mptcp_dss_ack_opt dss_ack_opt;
943		unsigned int len = sizeof (struct mptcp_dss_ack_opt);
944
945		if (do_csum)
946			len += 2;
947
948		CHECK_OPTLEN;
949
950		bzero(&dss_ack_opt, sizeof (dss_ack_opt));
951
952		MPT_LOCK(mp_tp);
953		/*
954		 * Data FIN occupies one sequence space.
955		 * Don't send it if it has been Acked.
956		 */
957		if (((mp_tp->mpt_sndnxt + 1) != mp_tp->mpt_sndmax) ||
958		    (mp_tp->mpt_snduna == mp_tp->mpt_sndmax)) {
959			MPT_UNLOCK(mp_tp);
960			if (mptcp_dbg == MP_VERBOSE_DEBUG_2)
961				printf("%s: Fin state %d %llu %llu\n", __func__,
962				    mp_tp->mpt_state, mp_tp->mpt_sndnxt,
963				    mp_tp->mpt_sndmax);
964			goto ret_optlen;
965		}
966
967		dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH;
968		dss_ack_opt.mdss_copt.mdss_len = len;
969		dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS;
970		dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M | MDSS_F;
971		dss_ack_opt.mdss_ack =
972		    htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt));
973		dss_ack_opt.mdss_dsn =
974		    htonl(MPTCP_DATASEQ_LOW32(mp_tp->mpt_sndnxt));
975		MPT_UNLOCK(mp_tp);
976		dss_ack_opt.mdss_subflow_seqn = 0;
977		dss_ack_opt.mdss_data_len = 1;
978		dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len);
979		memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt));
980		if (do_csum) {
981			*dss_valp = mp_tp->mpt_sndnxt;
982			*sseqp = (u_int32_t *)(void *)(opt + optlen +
983			    offsetof(struct mptcp_dss_ack_opt,
984			    mdss_subflow_seqn));
985		}
986		optlen += len;
987	}
988
989ret_optlen:
990	if (TRUE == *p_mptcp_acknow ) {
991		VERIFY(old_mpt_flags != 0);
992		u_int32_t new_mpt_flags = tp->t_mpflags &
993		    (TMPF_SND_MPPRIO | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL);
994
995		/*
996		 * If none of the above mpflags were acted on by
997		 * this routine, reset these flags and set p_mptcp_acknow
998		 * to false.
999		 * XXX The reset value of p_mptcp_acknow can be used
1000		 * to communicate tcp_output to NOT send a pure ack without any
1001		 * MPTCP options as it will be treated as a dup ack.
1002		 * Since the instances of mptcp_setup_opts not acting on
1003		 * these options are mostly corner cases and sending a dup
1004		 * ack here would only have an impact if the system
1005		 * has sent consecutive dup acks before this false one,
1006		 * we haven't modified the logic in tcp_output to avoid
1007		 * that.
1008		 */
1009		if (old_mpt_flags == new_mpt_flags) {
1010			tp->t_mpflags &= ~(TMPF_SND_MPPRIO
1011			    | TMPF_SND_REM_ADDR | TMPF_SND_MPFAIL);
1012			*p_mptcp_acknow = FALSE;
1013		}
1014	}
1015
1016	return optlen;
1017}
1018
1019/*
1020 * MPTCP Options Input Processing
1021 */
1022
1023
1024static int
1025mptcp_valid_mpcapable_common_opt(u_char *cp, u_int32_t mptcp_version)
1026{
1027	struct mptcp_mpcapable_opt_common *rsp =
1028	    (struct mptcp_mpcapable_opt_common *)cp;
1029
1030	/* mmco_kind, mmco_len and mmco_subtype are validated before */
1031
1032	/* In future, there can be more than one version supported */
1033	if (rsp->mmco_version != mptcp_version)
1034		return (0);
1035
1036	if (!(rsp->mmco_flags & MPCAP_PROPOSAL_SBIT))
1037		return (0);
1038
1039	if (rsp->mmco_flags & (MPCAP_BBIT | MPCAP_CBIT | MPCAP_DBIT |
1040	    MPCAP_EBIT | MPCAP_FBIT | MPCAP_GBIT))
1041		return (0);
1042
1043	return (1);
1044}
1045
1046
1047static void
1048mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
1049    int optlen)
1050{
1051	struct mptcp_mpcapable_opt_rsp1 *rsp1 = NULL;
1052	struct mptcp_mpcapable_opt_rsp *rsp = NULL;
1053	struct mptcb *mp_tp = tptomptp(tp);
1054
1055#define	MPTCP_OPT_ERROR_PATH(tp) {					\
1056	tp->t_mpflags |= TMPF_RESET;					\
1057	tcpstat.tcps_invalid_mpcap++;					\
1058	if (tp->t_inpcb->inp_socket != NULL) {				\
1059		soevent(tp->t_inpcb->inp_socket,			\
1060		    SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);	\
1061	}								\
1062}
1063
1064	if (mp_tp == NULL) {
1065		if (mptcp_dbg == MP_ERR_DEBUG)
1066			printf("MPTCP ERROR %s: NULL mpsocket \n", __func__);
1067		tcpstat.tcps_invalid_mpcap++;
1068		return;
1069	}
1070
1071	/* Validate the kind, len, flags */
1072	if (mptcp_valid_mpcapable_common_opt(cp, mp_tp->mpt_version) != 1) {
1073		tcpstat.tcps_invalid_mpcap++;
1074		return;
1075	}
1076
1077	/* A SYN contains only the MP_CAPABLE option */
1078	if ((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) {
1079		/* XXX passive side not supported yet */
1080		return;
1081	} else if ((th->th_flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
1082
1083		/* A SYN/ACK contains peer's key and flags */
1084		if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp)) {
1085			/* complain */
1086			if (mptcp_dbg == MP_ERR_DEBUG) {
1087				printf("%s: SYN_ACK optlen = %d, sizeof mp opt \
1088				    = %lu \n", __func__, optlen,
1089				    sizeof (struct mptcp_mpcapable_opt_rsp));
1090			}
1091			tcpstat.tcps_invalid_mpcap++;
1092			return;
1093		}
1094
1095		/*
1096		 * If checksum flag is set, enable MPTCP checksum, even if
1097		 * it was not negotiated on the first SYN.
1098		 */
1099		if (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
1100		    MPCAP_CHECKSUM_CBIT)
1101			mp_tp->mpt_flags |= MPTCPF_CHECKSUM;
1102
1103		rsp = (struct mptcp_mpcapable_opt_rsp *)cp;
1104		MPT_LOCK_SPIN(mp_tp);
1105		mp_tp->mpt_remotekey = rsp->mmc_localkey;
1106		MPT_UNLOCK(mp_tp);
1107		tp->t_mpflags |= TMPF_PREESTABLISHED;
1108
1109		if (mptcp_dbg > MP_VERBOSE_DEBUG_1) {
1110			printf("SYN_ACK pre established, optlen = %d, tp \
1111			    state = %d  sport = %x dport = %x key = %llx \n",
1112			    optlen, tp->t_state, th->th_sport, th->th_dport,
1113			    mp_tp->mpt_remotekey);
1114		}
1115
1116	} else if ((th->th_flags & TH_ACK) &&
1117		(tp->t_mpflags & TMPF_PREESTABLISHED)) {
1118
1119		/*
1120		 * Verify checksum flag is set, if we initially negotiated
1121		 * checksum.
1122		 */
1123		if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) &&
1124		    !(((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
1125		    MPCAP_CHECKSUM_CBIT)) {
1126			if (mptcp_dbg == MP_ERR_DEBUG) {
1127				printf("%s: checksum negotiation failure \n",
1128				    __func__);
1129			}
1130			MPTCP_OPT_ERROR_PATH(tp);
1131			return;
1132		}
1133
1134		if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM) &&
1135		    (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags &
1136		    MPCAP_CHECKSUM_CBIT)) {
1137			if (mptcp_dbg == MP_ERR_DEBUG) {
1138				printf("%s: checksum negotiation failure 2.\n",
1139				    __func__);
1140			}
1141			MPTCP_OPT_ERROR_PATH(tp);
1142			return;
1143		}
1144
1145		/*
1146		 * The ACK of a three way handshake contains peer's key and
1147		 * flags.
1148		 */
1149		if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp1)) {
1150			/* complain */
1151			if (mptcp_dbg == MP_ERR_DEBUG) {
1152				printf("%s: ACK optlen = %d , sizeof mp option \
1153				= %lu, state = %d \n",
1154				__func__,
1155				optlen,
1156				sizeof (struct mptcp_mpcapable_opt_rsp1),
1157				tp->t_state);
1158			}
1159			MPTCP_OPT_ERROR_PATH(tp);
1160			return;
1161		}
1162
1163		rsp1 = (struct mptcp_mpcapable_opt_rsp1 *)cp;
1164		/* Skipping MPT_LOCK for invariant key */
1165		if (rsp1->mmc_remotekey != *mp_tp->mpt_localkey) {
1166			if (mptcp_dbg == MP_ERR_DEBUG) {
1167				printf("MPTCP ERROR %s: key mismatch locally "
1168				    "stored key. rsp = %llx local = %llx \n",
1169				    __func__, rsp1->mmc_remotekey,
1170				    *mp_tp->mpt_localkey);
1171			}
1172			tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1173			MPTCP_OPT_ERROR_PATH(tp);
1174			return;
1175		} else {
1176			/* We received both keys. Almost an MPTCP connection */
1177			/* Skipping MPT_LOCK for invariant key */
1178			if (mp_tp->mpt_remotekey != rsp1->mmc_localkey) {
1179				if (mptcp_dbg == MP_ERR_DEBUG) {
1180					printf("MPTCP ERROR %s: keys don't"
1181					    " match\n", __func__);
1182				}
1183				tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1184				MPTCP_OPT_ERROR_PATH(tp);
1185				return;
1186			}
1187			tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1188			tp->t_mpflags |= TMPF_MPTCP_RCVD_KEY;
1189			tp->t_mpflags |= TMPF_MPTCP_TRUE;
1190			tp->t_inpcb->inp_socket->so_flags |= SOF_MPTCP_TRUE;
1191			MPT_LOCK(mp_tp);
1192			DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
1193			    uint32_t, 0 /* event */);
1194			mp_tp->mpt_state = MPTCPS_ESTABLISHED;
1195			MPT_UNLOCK(mp_tp);
1196			if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) {
1197				printf("MPTCP SUCCESS %s: rem key = %llx local \
1198				key = %llx \n",
1199				__func__, mp_tp->mpt_remotekey,
1200				*mp_tp->mpt_localkey);
1201			}
1202		}
1203		if (tp->t_mpuna) {
1204			tp->t_mpuna = 0;
1205		}
1206	}
1207}
1208
1209
1210static void
1211mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
1212{
1213#define	MPTCP_JOPT_ERROR_PATH(tp) {					\
1214	tp->t_mpflags |= TMPF_RESET;					\
1215	tcpstat.tcps_invalid_joins++;					\
1216	if (tp->t_inpcb->inp_socket != NULL) {				\
1217		soevent(tp->t_inpcb->inp_socket,			\
1218		    SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);	\
1219	}								\
1220}
1221	int error = 0;
1222	struct mptcb *mp_tp = tptomptp(tp);
1223
1224	if ((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) {
1225		/* We won't accept join requests as an active opener */
1226		if (tp->t_inpcb->inp_socket->so_flags & SOF_MPTCP_CLIENT) {
1227			MPTCP_JOPT_ERROR_PATH(tp);
1228			return;
1229		}
1230
1231		if (optlen != sizeof (struct mptcp_mpjoin_opt_req)) {
1232			if (mptcp_dbg == MP_ERR_DEBUG) {
1233				printf("SYN: unexpected optlen = %d, mp option"
1234				    "= %lu\n",
1235				    optlen,
1236				    sizeof (struct mptcp_mpjoin_opt_req));
1237			}
1238			/* send RST and close */
1239			MPTCP_JOPT_ERROR_PATH(tp);
1240			return;
1241		}
1242		/* not supported yet */
1243		return;
1244#ifdef MPTCP_NOTYET
1245		struct mptcp_mpjoin_opt_req *join_req =
1246		    (struct mptcp_mpjoin_opt_req *)cp;
1247		mp_so = mptcp_find_mpso(join_req->mmjo_peer_token);
1248		if (!mp_so) {
1249			if (mptcp_dbg >= MP_ERR_DEBUG)
1250				printf("%s: cannot find mp_so token = %x\n",
1251				    __func__, join_req->mmjo_peer_token);
1252			/* send RST */
1253			MPTCP_JOPT_ERROR_PATH(tp);
1254			return;
1255		}
1256		if (tp->t_mpflags & TMPF_PREESTABLISHED) {
1257			return;
1258		}
1259		mp_so->ms_remote_addr_id = join_req->mmjo_addr_id;
1260		mp_so->ms_remote_rand = join_req->mmjo_rand;
1261		tp->t_mpflags |= TMPF_PREESTABLISHED | TMPF_JOINED_FLOW;
1262		tp->t_mpflags |= TMPF_RECVD_JOIN;
1263		tp->t_inpcb->inp_socket->so_flags |= SOF_MP_SEC_SUBFLOW;
1264		if (join_req->mmjo_subtype & MPTCP_BACKUP) {
1265			tp->t_mpflags |= TMPF_BACKUP_PATH;
1266		}
1267#endif
1268	} else if ((th->th_flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) {
1269		struct mptcp_mpjoin_opt_rsp *join_rsp =
1270		    (struct mptcp_mpjoin_opt_rsp *)cp;
1271
1272		if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp)) {
1273			if (mptcp_dbg >= MP_ERR_DEBUG) {
1274				printf("SYN_ACK: unexpected optlen = %d mp "
1275				    "option = %lu\n", optlen,
1276				    sizeof (struct mptcp_mpjoin_opt_rsp));
1277			}
1278			tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1279			/* send RST and close */
1280			MPTCP_JOPT_ERROR_PATH(tp);
1281			return;
1282		}
1283
1284		if (mp_tp == NULL) {
1285			if (mptcp_dbg >= MP_ERR_DEBUG)
1286				printf("%s: cannot find mp_tp in SYN_ACK\n",
1287				    __func__);
1288			tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1289			/* send RST and close */
1290			MPTCP_JOPT_ERROR_PATH(tp);
1291			return;
1292		}
1293
1294		mptcp_set_raddr_rand(tp->t_local_aid,
1295		    tptomptp(tp),
1296		    join_rsp->mmjo_addr_id, join_rsp->mmjo_rand);
1297		error = mptcp_validate_join_hmac(tp,
1298		    (u_char*)&join_rsp->mmjo_mac, SHA1_TRUNCATED);
1299		if (error) {
1300			if (mptcp_dbg >= MP_ERR_DEBUG) {
1301				printf("%s: SYN_ACK error = %d \n", __func__,
1302				    error);
1303			}
1304			tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1305			/* send RST and close */
1306			MPTCP_JOPT_ERROR_PATH(tp);
1307			return;
1308		}
1309		tp->t_mpflags |= TMPF_SENT_JOIN;
1310	} else if ((th->th_flags & TH_ACK) &&
1311	    (tp->t_mpflags & TMPF_PREESTABLISHED)) {
1312		struct mptcp_mpjoin_opt_rsp2 *join_rsp2 =
1313		    (struct mptcp_mpjoin_opt_rsp2 *)cp;
1314
1315		if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp2)) {
1316			if (mptcp_dbg >= MP_ERR_DEBUG) {
1317				printf("ACK: unexpected optlen = %d mp option "
1318				    "= %lu \n",	optlen,
1319				    sizeof (struct mptcp_mpjoin_opt_rsp2));
1320			}
1321			tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1322			/* send RST and close */
1323			MPTCP_JOPT_ERROR_PATH(tp);
1324			return;
1325		}
1326
1327		if (mp_tp == NULL) {
1328			tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1329			MPTCP_JOPT_ERROR_PATH(tp);
1330			return;
1331		}
1332
1333		error = mptcp_validate_join_hmac(tp, join_rsp2->mmjo_mac,
1334		    SHA1_RESULTLEN);
1335		if (error) {
1336			if (mptcp_dbg >= MP_ERR_DEBUG) {
1337				printf("%s: ACK error = %d\n", __func__,
1338				    error);
1339			}
1340			tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1341			MPTCP_JOPT_ERROR_PATH(tp);
1342			return;
1343		}
1344		tp->t_mpflags |= TMPF_MPTCP_TRUE;
1345		tp->t_mpflags &= ~TMPF_PREESTABLISHED;
1346		tp->t_flags |= TF_ACKNOW;
1347		tp->t_mpflags |= TMPF_MPTCP_ACKNOW;
1348		tp->t_inpcb->inp_socket->so_flags |= SOF_MPTCP_TRUE;
1349		if (mptcp_dbg >= MP_ERR_DEBUG) {
1350			printf("MPTCP SUCCESS %s: join \n", __func__);
1351		}
1352	}
1353}
1354
1355static int
1356mptcp_validate_join_hmac(struct tcpcb *tp, u_char* hmac, int mac_len)
1357{
1358	u_char digest[SHA1_RESULTLEN] = {0};
1359	struct mptcb *mp_tp = NULL;
1360	mptcp_key_t rem_key, loc_key;
1361	u_int32_t rem_rand, loc_rand;
1362
1363	mp_tp = tp->t_mptcb;
1364	if (mp_tp == NULL)
1365		return (-1);
1366
1367	rem_rand = loc_rand = 0;
1368
1369	MPT_LOCK(mp_tp);
1370	rem_key = mp_tp->mpt_remotekey;
1371	loc_key = *mp_tp->mpt_localkey;
1372	MPT_UNLOCK(mp_tp);
1373
1374	mptcp_get_rands(tp->t_local_aid, mp_tp, &loc_rand, &rem_rand);
1375	if ((rem_rand == 0) || (loc_rand == 0))
1376		return (-1);
1377
1378	mptcp_hmac_sha1(rem_key, loc_key, rem_rand, loc_rand,
1379	    digest, sizeof (digest));
1380
1381	if (bcmp(digest, hmac, mac_len) == 0)
1382		return (0); /* matches */
1383	else {
1384		printf("%s: remote key %llx local key %llx remote rand %x "
1385		    "local rand %x \n", __func__, rem_key, loc_key,
1386		    rem_rand, loc_rand);
1387		return (-1);
1388	}
1389}
1390
1391static void
1392mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, struct tcpcb *tp)
1393{
1394	struct mptcb *mp_tp = tptomptp(tp);
1395	int close_notify = 0;
1396
1397	if (mp_tp == NULL)
1398		return;
1399
1400	MPT_LOCK(mp_tp);
1401	if (MPTCP_SEQ_LEQ(full_dack, mp_tp->mpt_sndmax) &&
1402	    MPTCP_SEQ_GEQ(full_dack, mp_tp->mpt_snduna)) {
1403		mptcp_data_ack_rcvd(mp_tp, tp, full_dack);
1404		if (mp_tp->mpt_state > MPTCPS_FIN_WAIT_2)
1405			close_notify = 1;
1406		MPT_UNLOCK(mp_tp);
1407		mptcp_notify_mpready(tp->t_inpcb->inp_socket);
1408		if (close_notify)
1409			mptcp_notify_close(tp->t_inpcb->inp_socket);
1410		if (mp_tp->mpt_flags & MPTCPF_RCVD_64BITACK) {
1411			mp_tp->mpt_flags &= ~MPTCPF_RCVD_64BITACK;
1412			mp_tp->mpt_flags &= ~MPTCPF_SND_64BITDSN;
1413		}
1414	} else {
1415		MPT_UNLOCK(mp_tp);
1416		if (mptcp_dbg == MP_VERBOSE_DEBUG_2) {
1417			printf("%s: unexpected dack %llx snduna %llx "
1418			    "sndmax %llx\n", __func__, full_dack,
1419			    mp_tp->mpt_snduna, mp_tp->mpt_sndmax);
1420		}
1421	}
1422
1423	if (mptcp_dbg == MP_VERBOSE_DEBUG_2) {
1424		printf("%s: full_dack = %llu \n", __func__, full_dack);
1425	}
1426}
1427
1428static void
1429mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp)
1430{
1431	struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp;
1432	u_int64_t full_dack = 0;
1433	struct mptcb *mp_tp = tptomptp(tp);
1434	int csum_len = 0;
1435
1436#define	MPTCP_DSS_OPT_SZ_CHK(len, expected_len) {		\
1437	if (len != expected_len) {				\
1438		if (mptcp_dbg >= MP_ERR_DEBUG) {		\
1439			printf("MPTCP ERROR %s: bad len = %d"	\
1440			    "dss: %x \n", __func__,		\
1441			    len,				\
1442			    dss_rsp->mdss_flags);		\
1443		}						\
1444		return;						\
1445	}							\
1446}
1447	if (mp_tp == NULL)
1448		return;
1449
1450	if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)
1451		csum_len = 2;
1452
1453	dss_rsp->mdss_flags &= (MDSS_A|MDSS_a|MDSS_M|MDSS_m);
1454	switch (dss_rsp->mdss_flags) {
1455		case (MDSS_M):
1456		{
1457			/* 32-bit DSS, No Data ACK */
1458			struct mptcp_dsn_opt *dss_rsp1;
1459			dss_rsp1 = (struct mptcp_dsn_opt *)cp;
1460
1461			MPTCP_DSS_OPT_SZ_CHK(dss_rsp1->mdss_copt.mdss_len,
1462			    sizeof (struct mptcp_dsn_opt) + csum_len);
1463			if (csum_len == 0)
1464				mptcp_update_dss_rcv_state(dss_rsp1, tp, 0);
1465			else
1466				mptcp_update_dss_rcv_state(dss_rsp1, tp,
1467				    *(uint16_t *)(void *)(cp +
1468				    (dss_rsp1->mdss_copt.mdss_len - csum_len)));
1469			break;
1470		}
1471		case (MDSS_A):
1472		{
1473			/* 32-bit Data ACK, no DSS */
1474			struct mptcp_data_ack_opt *dack_opt;
1475			dack_opt = (struct mptcp_data_ack_opt *)cp;
1476
1477			MPTCP_DSS_OPT_SZ_CHK(dack_opt->mdss_copt.mdss_len,
1478			    sizeof (struct mptcp_data_ack_opt));
1479
1480			u_int32_t dack = dack_opt->mdss_ack;
1481			NTOHL(dack);
1482			MPT_LOCK_SPIN(mp_tp);
1483			MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1484			MPT_UNLOCK(mp_tp);
1485			mptcp_do_dss_opt_ack_meat(full_dack, tp);
1486			break;
1487		}
1488		case (MDSS_M | MDSS_A):
1489		{
1490			/* 32-bit Data ACK + 32-bit DSS */
1491			struct mptcp_dss_ack_opt *dss_ack_rsp;
1492			dss_ack_rsp = (struct mptcp_dss_ack_opt *)cp;
1493
1494			MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len,
1495			    sizeof (struct mptcp_dss_ack_opt) + csum_len);
1496
1497			u_int32_t dack = dss_ack_rsp->mdss_ack;
1498			NTOHL(dack);
1499			MPT_LOCK_SPIN(mp_tp);
1500			MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1501			MPT_UNLOCK(mp_tp);
1502			mptcp_do_dss_opt_ack_meat(full_dack, tp);
1503			if (csum_len == 0)
1504				mptcp_update_rcv_state_f(dss_ack_rsp, tp, 0);
1505			else
1506				mptcp_update_rcv_state_f(dss_ack_rsp, tp,
1507				    *(uint16_t *)(void *)(cp +
1508				    (dss_ack_rsp->mdss_copt.mdss_len -
1509				    csum_len)));
1510			break;
1511		}
1512		case (MDSS_M | MDSS_m):
1513		{
1514			/* 64-bit DSS , No Data ACK */
1515			struct mptcp_dsn64_opt *dsn64;
1516			dsn64 = (struct mptcp_dsn64_opt *)cp;
1517			u_int64_t full_dsn;
1518
1519			MPTCP_DSS_OPT_SZ_CHK(dsn64->mdss_copt.mdss_len,
1520			    sizeof (struct mptcp_dsn64_opt) + csum_len);
1521
1522			if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
1523				printf("%s: 64-bit M present.\n", __func__);
1524			}
1525
1526			MPT_LOCK_SPIN(mp_tp);
1527			mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1528			MPT_UNLOCK(mp_tp);
1529
1530			full_dsn = mptcp_ntoh64(dsn64->mdss_dsn);
1531			NTOHL(dsn64->mdss_subflow_seqn);
1532			NTOHS(dsn64->mdss_data_len);
1533			if (csum_len == 0)
1534				mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1535				    dsn64->mdss_subflow_seqn,
1536				    dsn64->mdss_data_len,
1537				    0);
1538			else
1539				mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1540				    dsn64->mdss_subflow_seqn,
1541				    dsn64->mdss_data_len,
1542				    *(uint16_t *)(void *)(cp +
1543				    dsn64->mdss_copt.mdss_len - csum_len));
1544			break;
1545		}
1546		case (MDSS_A | MDSS_a):
1547		{
1548			/* 64-bit Data ACK, no DSS */
1549			struct mptcp_data_ack64_opt *dack64;
1550			dack64 = (struct mptcp_data_ack64_opt *)cp;
1551
1552			MPTCP_DSS_OPT_SZ_CHK(dack64->mdss_copt.mdss_len,
1553			    sizeof (struct mptcp_data_ack64_opt));
1554
1555
1556			if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
1557				printf("%s: 64-bit A present. \n", __func__);
1558			}
1559
1560			MPT_LOCK_SPIN(mp_tp);
1561			mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1562			MPT_UNLOCK(mp_tp);
1563
1564			full_dack = mptcp_ntoh64(dack64->mdss_ack);
1565			mptcp_do_dss_opt_ack_meat(full_dack, tp);
1566			break;
1567		}
1568		case (MDSS_M | MDSS_m | MDSS_A):
1569		{
1570			/* 64-bit DSS + 32-bit Data ACK */
1571			struct mptcp_dss64_ack32_opt *dss_ack_rsp;
1572			dss_ack_rsp = (struct mptcp_dss64_ack32_opt *)cp;
1573
1574			MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len,
1575			    sizeof (struct mptcp_dss64_ack32_opt) + csum_len);
1576
1577			if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
1578				printf("%s: 64-bit M and 32-bit A present.\n",
1579					__func__);
1580			}
1581
1582			u_int32_t dack = dss_ack_rsp->mdss_ack;
1583			NTOHL(dack);
1584			MPT_LOCK_SPIN(mp_tp);
1585			mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1586			MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack);
1587			MPT_UNLOCK(mp_tp);
1588			mptcp_do_dss_opt_ack_meat(full_dack, tp);
1589			if (csum_len == 0)
1590				mptcp_update_rcv_state_g(dss_ack_rsp, tp, 0);
1591			else
1592				mptcp_update_rcv_state_g(dss_ack_rsp, tp,
1593				    *(uint16_t *)(void *)(cp +
1594				    dss_ack_rsp->mdss_copt.mdss_len -
1595				    csum_len));
1596			break;
1597		}
1598		case (MDSS_M | MDSS_A | MDSS_a):
1599		{
1600			/* 32-bit DSS + 64-bit Data ACK */
1601			struct mptcp_dss32_ack64_opt *dss32_ack64_opt;
1602			dss32_ack64_opt = (struct mptcp_dss32_ack64_opt *)cp;
1603			u_int64_t full_dsn;
1604
1605			MPTCP_DSS_OPT_SZ_CHK(
1606			    dss32_ack64_opt->mdss_copt.mdss_len,
1607			    sizeof (struct mptcp_dss32_ack64_opt) + csum_len);
1608
1609			if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
1610				printf("%s: 32-bit M and 64-bit A present.\n",
1611					__func__);
1612			}
1613			full_dack = mptcp_ntoh64(dss32_ack64_opt->mdss_ack);
1614			mptcp_do_dss_opt_ack_meat(full_dack, tp);
1615			NTOHL(dss32_ack64_opt->mdss_dsn);
1616			MPT_LOCK_SPIN(mp_tp);
1617			mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1618			MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt,
1619				dss32_ack64_opt->mdss_dsn, full_dsn);
1620			MPT_UNLOCK(mp_tp);
1621			NTOHL(dss32_ack64_opt->mdss_subflow_seqn);
1622			NTOHS(dss32_ack64_opt->mdss_data_len);
1623			if (csum_len == 0)
1624				mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1625				    dss32_ack64_opt->mdss_subflow_seqn,
1626				    dss32_ack64_opt->mdss_data_len, 0);
1627			else
1628				mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1629				    dss32_ack64_opt->mdss_subflow_seqn,
1630				    dss32_ack64_opt->mdss_data_len,
1631				    *(uint16_t *)(void *)(cp +
1632				    dss32_ack64_opt->mdss_copt.mdss_len -
1633				    csum_len));
1634			break;
1635		}
1636		case (MDSS_M | MDSS_m | MDSS_A | MDSS_a):
1637		{
1638			/* 64-bit DSS + 64-bit Data ACK */
1639			struct mptcp_dss64_ack64_opt *dss64_ack64;
1640			dss64_ack64 = (struct mptcp_dss64_ack64_opt *)cp;
1641			u_int64_t full_dsn;
1642
1643			MPTCP_DSS_OPT_SZ_CHK(dss64_ack64->mdss_copt.mdss_len,
1644			    sizeof (struct mptcp_dss64_ack64_opt) + csum_len);
1645
1646			if (mptcp_dbg == MP_VERBOSE_DEBUG_4) {
1647				printf("%s: 64-bit M and 64-bit A present.\n",
1648					__func__);
1649			}
1650			MPT_LOCK_SPIN(mp_tp);
1651			mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK;
1652			mp_tp->mpt_flags |= MPTCPF_SND_64BITACK;
1653			MPT_UNLOCK(mp_tp);
1654			full_dsn = mptcp_ntoh64(dss64_ack64->mdss_dsn);
1655			full_dack = mptcp_ntoh64(dss64_ack64->mdss_dsn);
1656			mptcp_do_dss_opt_ack_meat(full_dack, tp);
1657			NTOHL(dss64_ack64->mdss_subflow_seqn);
1658			NTOHS(dss64_ack64->mdss_data_len);
1659			if (csum_len == 0)
1660				mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1661				    dss64_ack64->mdss_subflow_seqn,
1662				    dss64_ack64->mdss_data_len, 0);
1663			else
1664				mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn,
1665				    dss64_ack64->mdss_subflow_seqn,
1666				    dss64_ack64->mdss_data_len,
1667				    *(uint16_t *)(void *)(cp +
1668				    dss64_ack64->mdss_copt.mdss_len -
1669				    csum_len));
1670			break;
1671		}
1672		default:
1673			if (mptcp_dbg >= MP_ERR_DEBUG) {
1674				printf("MPTCP ERROR %s: File bug, DSS flags = %x\n",
1675					__func__, dss_rsp->mdss_flags);
1676			}
1677			break;
1678	}
1679}
1680
1681
1682static void
1683mptcp_do_fin_opt(struct tcpcb *tp)
1684{
1685	struct mptcb *mp_tp = (struct mptcb *)tp->t_mptcb;
1686
1687	if (!(tp->t_mpflags & TMPF_RECV_DFIN)) {
1688		if (mp_tp != NULL) {
1689			MPT_LOCK(mp_tp);
1690			mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
1691			MPT_UNLOCK(mp_tp);
1692
1693			if (tp->t_inpcb->inp_socket != NULL) {
1694				soevent(tp->t_inpcb->inp_socket,
1695				    SO_FILT_HINT_LOCKED |
1696				    SO_FILT_HINT_MPCANTRCVMORE);
1697			}
1698
1699		}
1700		tp->t_mpflags |= TMPF_RECV_DFIN;
1701	}
1702
1703	tp->t_mpflags |= TMPF_MPTCP_ACKNOW;
1704	/*
1705	 * Since this is a data level FIN, TCP needs to be explicitly told
1706	 * to send back an ACK on which the Data ACK is piggybacked.
1707	 */
1708	tp->t_flags |= TF_ACKNOW;
1709}
1710
1711static void
1712mptcp_do_dss_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen)
1713{
1714#pragma unused(th, optlen)
1715	struct mptcb *mp_tp = (struct mptcb *)tp->t_mptcb;
1716
1717	if (!mp_tp)
1718		return;
1719
1720	/* We may get Data ACKs just during fallback, so don't ignore those */
1721	if ((tp->t_mpflags & TMPF_MPTCP_TRUE) ||
1722	    (tp->t_mpflags & TMPF_TCP_FALLBACK)) {
1723		struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp;
1724
1725		if (dss_rsp->mdss_subtype == MPO_DSS) {
1726			if (mptcp_dbg > MP_VERBOSE_DEBUG_4) {
1727				printf("%s: DSS option received: %d ",
1728				    __func__, dss_rsp->mdss_flags);
1729			}
1730			if (dss_rsp->mdss_flags & MDSS_F) {
1731				if (mptcp_dbg >= MP_VERBOSE_DEBUG_1)
1732					printf("%s: received FIN\n", __func__);
1733				mptcp_do_fin_opt(tp);
1734			}
1735
1736			mptcp_do_dss_opt_meat(cp, tp);
1737		}
1738	}
1739}
1740
1741static void
1742mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
1743{
1744	struct mptcb *mp_tp = NULL;
1745	struct mptcp_fastclose_opt *fc_opt = (struct mptcp_fastclose_opt *)cp;
1746
1747	if (th->th_flags != TH_ACK)
1748		return;
1749
1750	if (mptcp_dbg > MP_VERBOSE_DEBUG_2)
1751		printf("%s: received \n", __func__);
1752
1753	if (fc_opt->mfast_len != sizeof (struct mptcp_fastclose_opt)) {
1754		tcpstat.tcps_invalid_opt++;
1755		return;
1756	}
1757
1758	mp_tp = (struct mptcb *)tp->t_mptcb;
1759	if (!mp_tp)
1760		return;
1761
1762	if (fc_opt->mfast_key != mptcp_get_localkey(mp_tp)) {
1763		tcpstat.tcps_invalid_opt++;
1764		return;
1765	}
1766
1767	/*
1768	 * fastclose could make us more vulnerable to attacks, hence
1769	 * accept only those that are at the next expected sequence number.
1770	 */
1771	if (th->th_seq != tp->rcv_nxt) {
1772		tcpstat.tcps_invalid_opt++;
1773		return;
1774	}
1775
1776	MPT_LOCK(mp_tp);
1777	if (mp_tp->mpt_state != MPTCPS_FASTCLOSE_WAIT) {
1778		mp_tp->mpt_state = MPTCPS_FASTCLOSE_WAIT;
1779		DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
1780		    uint32_t, 0 /* event */);
1781		mptcp_start_timer(mp_tp, MPTT_FASTCLOSE);
1782	}
1783	MPT_UNLOCK(mp_tp);
1784
1785	/* Reset this flow */
1786	tp->t_mpflags |= TMPF_RESET;
1787
1788	if (tp->t_inpcb->inp_socket != NULL) {
1789		soevent(tp->t_inpcb->inp_socket,
1790		    SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1791	}
1792}
1793
1794
1795static void
1796mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th)
1797{
1798	struct mptcb *mp_tp = NULL;
1799	struct mptcp_mpfail_opt *fail_opt = (struct mptcp_mpfail_opt *)cp;
1800	u_int32_t mdss_subflow_seqn = 0;
1801	int error = 0;
1802
1803	/*
1804	 * mpfail could make us more vulnerable to attacks. Hence accept
1805	 * only those that are the next expected sequence number.
1806	 */
1807	if (th->th_seq != tp->rcv_nxt) {
1808		tcpstat.tcps_invalid_opt++;
1809		return;
1810	}
1811
1812	/* A packet without RST, must atleast have the ACK bit set */
1813	if ((th->th_flags != TH_ACK) && (th->th_flags != TH_RST))
1814		return;
1815
1816	if (fail_opt->mfail_len != sizeof (struct mptcp_mpfail_opt))
1817		return;
1818
1819	mp_tp = (struct mptcb *)tp->t_mptcb;
1820	if (mp_tp == NULL)
1821		return;
1822	MPT_LOCK(mp_tp);
1823	mp_tp->mpt_flags |= MPTCPF_RECVD_MPFAIL;
1824	mp_tp->mpt_dsn_at_csum_fail = mptcp_hton64(fail_opt->mfail_dsn);
1825	MPT_UNLOCK(mp_tp);
1826	error = mptcp_get_map_for_dsn(tp->t_inpcb->inp_socket,
1827	    mp_tp->mpt_dsn_at_csum_fail, &mdss_subflow_seqn);
1828	if (error == 0) {
1829		mp_tp->mpt_ssn_at_csum_fail = mdss_subflow_seqn;
1830	}
1831
1832	mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
1833}
1834
1835int
1836tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
1837    struct tcpopt *to, int optlen)
1838{
1839	int mptcp_subtype;
1840
1841	/* All MPTCP options have atleast 4 bytes */
1842	if (optlen < 4)
1843		return (0);
1844
1845	mptcp_subtype = (cp[2] >> 4);
1846
1847	switch (mptcp_subtype) {
1848		case MPO_CAPABLE:
1849			mptcp_do_mpcapable_opt(tp, cp, th, optlen);
1850			break;
1851		case MPO_JOIN:
1852			mptcp_do_mpjoin_opt(tp, cp, th, optlen);
1853			break;
1854		case MPO_DSS:
1855			mptcp_do_dss_opt(tp, cp, th, optlen);
1856			break;
1857		case MPO_FASTCLOSE:
1858			mptcp_do_fastclose_opt(tp, cp, th);
1859			break;
1860		case MPO_FAIL:
1861			mptcp_do_mpfail_opt(tp, cp, th);
1862			break;
1863		case MPO_ADD_ADDR:	/* fall through */
1864		case MPO_REMOVE_ADDR:	/* fall through */
1865		case MPO_PRIO:
1866			to->to_flags |= TOF_MPTCP;
1867			break;
1868		default:
1869			printf("%s: type = %d\n", __func__, mptcp_subtype);
1870			break;
1871	}
1872	return (0);
1873}
1874
1875/*
1876 * MPTCP ADD_ADDR and REMOVE_ADDR options
1877 */
1878
1879/*
1880 * ADD_ADDR is only placeholder code - not sent on wire
1881 * The ADD_ADDR option is not sent on wire because of security issues
1882 * around connection hijacking.
1883 */
1884void
1885mptcp_send_addaddr_opt(struct tcpcb *tp, struct mptcp_addaddr_opt *opt)
1886{
1887
1888	opt->ma_kind = TCPOPT_MULTIPATH;
1889	opt->ma_len = sizeof (struct mptcp_addaddr_opt);
1890	opt->ma_subtype = MPO_ADD_ADDR;
1891	opt->ma_addr_id = tp->t_local_aid;
1892#ifdef MPTCP_NOTYET
1893	struct inpcb *inp = tp->t_inpcb;
1894	if (inp->inp_vflag == AF_INET) {
1895		opt->ma_ipver = MA_IPVer_V4;
1896		bcopy((char *)&sin->sin_addr.s_addr, (char *)opt + opt->ma_len,
1897		    sizeof (in_addr_t));
1898		opt->ma_len += sizeof (in_addr_t);
1899	} else if (inp->inp_vflag == AF_INET6) {
1900		opt->ma_ipver = MA_IPVer_V6;
1901		bcopy((char *)&sin6->sin6_addr, (char *)opt + opt->ma_len,
1902		    sizeof (struct in6_addr));
1903		opt->ma_len += sizeof (struct in6_addr);
1904	}
1905#if 0
1906	if (tp->t_mp_port) {
1907		/* add ports XXX */
1908	}
1909#endif
1910#endif
1911}
1912
1913/* REMOVE_ADDR option is sent when a source address goes away */
1914void
1915mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt)
1916{
1917	if (mptcp_dbg >= MP_ERR_DEBUG)
1918		printf("%s: local id %d remove id %d \n", __func__,
1919		    tp->t_local_aid, tp->t_rem_aid);
1920
1921	bzero(opt, sizeof (*opt));
1922	opt->mr_kind = TCPOPT_MULTIPATH;
1923	opt->mr_len = sizeof (*opt);
1924	opt->mr_subtype = MPO_REMOVE_ADDR;
1925	opt->mr_addr_id = tp->t_rem_aid;
1926	tp->t_mpflags &= ~TMPF_SND_REM_ADDR;
1927}
1928
1929/*
1930 * MPTCP MP_PRIO option
1931 */
1932
1933#if 0
1934/*
1935 * Current implementation drops incoming MP_PRIO option and this code is
1936 * just a placeholder. The option is dropped because only the mobile client can
1937 * decide which of the subflows is preferred (usually wifi is preferred
1938 * over Cellular).
1939 */
1940void
1941mptcp_do_mpprio_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th,
1942    int optlen)
1943{
1944	int bkp = 0;
1945	struct mptcp_mpprio_opt *mpprio = (struct mptcp_mpprio_opt *)cp;
1946
1947	if ((tp == NULL) || !(tp->t_mpflags & TMPF_MPTCP_TRUE))
1948		return;
1949
1950	if ((mpprio->mpprio_len != sizeof (struct mptcp_mpprio_addr_opt)) &&
1951	    (mpprio->mpprio_len != sizeof (struct mptcp_mpprio_opt)))
1952		return;
1953}
1954#endif
1955
1956/* We send MP_PRIO option based on the values set by the SIOCSCONNORDER ioctl */
1957static int
1958mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen)
1959{
1960	struct mptcp_mpprio_addr_opt mpprio;
1961
1962	if (tp->t_state != TCPS_ESTABLISHED) {
1963		tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1964		return (optlen);
1965	}
1966
1967	if (mptcp_mpprio_enable != 1) {
1968		tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1969		return (optlen);
1970	}
1971
1972	if ((MAX_TCPOPTLEN - optlen) <
1973	    (int)sizeof (mpprio))
1974		return (optlen);
1975
1976	bzero(&mpprio, sizeof (mpprio));
1977	mpprio.mpprio_kind = TCPOPT_MULTIPATH;
1978	mpprio.mpprio_len = sizeof (mpprio);
1979	mpprio.mpprio_subtype = MPO_PRIO;
1980	if (tp->t_mpflags & TMPF_BACKUP_PATH)
1981		mpprio.mpprio_flags |= MPTCP_MPPRIO_BKP;
1982	mpprio.mpprio_addrid = tp->t_local_aid;
1983	memcpy(cp + optlen, &mpprio, sizeof (mpprio));
1984	optlen += sizeof (mpprio);
1985	tp->t_mpflags &= ~TMPF_SND_MPPRIO;
1986	if (mptcp_dbg >= MP_ERR_DEBUG)
1987		printf("%s: aid = %d \n", __func__, tp->t_local_aid);
1988	return (optlen);
1989}
1990