1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1988, 1992, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 *    notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 *    notice, this list of conditions and the following disclaimer in the
39 *    documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 *    must display the following acknowledgement:
42 *	This product includes software developed by the University of
43 *	California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
61 */
62
63#include <sys/param.h>
64#include <machine/endian.h>
65#include <sys/mbuf.h>
66#include <kern/debug.h>
67#include <net/dlil.h>
68#include <netinet/in.h>
69#define	_IP_VHL
70#include <netinet/ip.h>
71#include <netinet/ip_var.h>
72
73/*
74 * Checksum routine for Internet Protocol family headers (Portable Version).
75 *
76 * This routine is very heavily used in the network
77 * code and should be modified for each CPU to be as fast as possible.
78 */
79#define REDUCE16 {							  \
80	q_util.q = sum;							  \
81	l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
82	sum = l_util.s[0] + l_util.s[1];				  \
83	ADDCARRY(sum);							  \
84}
85
86union l_util {
87        uint16_t s[2];
88        uint32_t l;
89};
90
91union q_util {
92        uint16_t s[4];
93        uint32_t l[2];
94        uint64_t q;
95};
96
97#define	PREDICT_FALSE(_exp)	__builtin_expect((_exp), 0)
98
99static uint16_t in_cksumdata(const void *buf, int len);
100
101/*
102 * Portable version of 16-bit 1's complement sum function that works
103 * on a contiguous buffer.  This is used mainly for instances where
104 * the caller is certain about the buffer requirements, e.g. for IP
105 * header checksum calculation, though it is capable of being used
106 * on any arbitrary data span.  The platform-specific cpu_in_cksum()
107 * routine might be better-optmized, so use that instead for large
108 * data span.
109 *
110 * The logic is borrowed from <bsd/netinet/cpu_in_cksum.c>
111 */
112
113#if ULONG_MAX == 0xffffffffUL
114/* 32-bit version */
115static uint16_t
116in_cksumdata(const void *buf, int mlen)
117{
118	uint32_t sum, partial;
119	unsigned int final_acc;
120	uint8_t *data = (void *)buf;
121	boolean_t needs_swap, started_on_odd;
122
123	VERIFY(mlen >= 0);
124
125	needs_swap = FALSE;
126	started_on_odd = FALSE;
127
128	sum = 0;
129	partial = 0;
130
131	if ((uintptr_t)data & 1) {
132		/* Align on word boundary */
133		started_on_odd = !started_on_odd;
134#if BYTE_ORDER == LITTLE_ENDIAN
135		partial = *data << 8;
136#else
137		partial = *data;
138#endif
139		++data;
140		--mlen;
141	}
142	needs_swap = started_on_odd;
143	while (mlen >= 32) {
144		__builtin_prefetch(data + 32);
145		partial += *(uint16_t *)(void *)data;
146		partial += *(uint16_t *)(void *)(data + 2);
147		partial += *(uint16_t *)(void *)(data + 4);
148		partial += *(uint16_t *)(void *)(data + 6);
149		partial += *(uint16_t *)(void *)(data + 8);
150		partial += *(uint16_t *)(void *)(data + 10);
151		partial += *(uint16_t *)(void *)(data + 12);
152		partial += *(uint16_t *)(void *)(data + 14);
153		partial += *(uint16_t *)(void *)(data + 16);
154		partial += *(uint16_t *)(void *)(data + 18);
155		partial += *(uint16_t *)(void *)(data + 20);
156		partial += *(uint16_t *)(void *)(data + 22);
157		partial += *(uint16_t *)(void *)(data + 24);
158		partial += *(uint16_t *)(void *)(data + 26);
159		partial += *(uint16_t *)(void *)(data + 28);
160		partial += *(uint16_t *)(void *)(data + 30);
161		data += 32;
162		mlen -= 32;
163		if (PREDICT_FALSE(partial & 0xc0000000)) {
164			if (needs_swap)
165				partial = (partial << 8) +
166				    (partial >> 24);
167			sum += (partial >> 16);
168			sum += (partial & 0xffff);
169			partial = 0;
170		}
171	}
172	if (mlen & 16) {
173		partial += *(uint16_t *)(void *)data;
174		partial += *(uint16_t *)(void *)(data + 2);
175		partial += *(uint16_t *)(void *)(data + 4);
176		partial += *(uint16_t *)(void *)(data + 6);
177		partial += *(uint16_t *)(void *)(data + 8);
178		partial += *(uint16_t *)(void *)(data + 10);
179		partial += *(uint16_t *)(void *)(data + 12);
180		partial += *(uint16_t *)(void *)(data + 14);
181		data += 16;
182		mlen -= 16;
183	}
184	/*
185	 * mlen is not updated below as the remaining tests
186	 * are using bit masks, which are not affected.
187	 */
188	if (mlen & 8) {
189		partial += *(uint16_t *)(void *)data;
190		partial += *(uint16_t *)(void *)(data + 2);
191		partial += *(uint16_t *)(void *)(data + 4);
192		partial += *(uint16_t *)(void *)(data + 6);
193		data += 8;
194	}
195	if (mlen & 4) {
196		partial += *(uint16_t *)(void *)data;
197		partial += *(uint16_t *)(void *)(data + 2);
198		data += 4;
199	}
200	if (mlen & 2) {
201		partial += *(uint16_t *)(void *)data;
202		data += 2;
203	}
204	if (mlen & 1) {
205#if BYTE_ORDER == LITTLE_ENDIAN
206		partial += *data;
207#else
208		partial += *data << 8;
209#endif
210		started_on_odd = !started_on_odd;
211	}
212
213	if (needs_swap)
214		partial = (partial << 8) + (partial >> 24);
215	sum += (partial >> 16) + (partial & 0xffff);
216	sum = (sum >> 16) + (sum & 0xffff);
217
218	final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
219	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
220
221	return (final_acc);
222}
223
224#else
225/* 64-bit version */
226static uint16_t
227in_cksumdata(const void *buf, int mlen)
228{
229	uint64_t sum, partial;
230	unsigned int final_acc;
231	uint8_t *data = (void *)buf;
232	boolean_t needs_swap, started_on_odd;
233
234	VERIFY(mlen >= 0);
235
236	needs_swap = FALSE;
237	started_on_odd = FALSE;
238
239	sum = 0;
240	partial = 0;
241
242	if ((uintptr_t)data & 1) {
243		/* Align on word boundary */
244		started_on_odd = !started_on_odd;
245#if BYTE_ORDER == LITTLE_ENDIAN
246		partial = *data << 8;
247#else
248		partial = *data;
249#endif
250		++data;
251		--mlen;
252	}
253	needs_swap = started_on_odd;
254	if ((uintptr_t)data & 2) {
255		if (mlen < 2)
256			goto trailing_bytes;
257		partial += *(uint16_t *)(void *)data;
258		data += 2;
259		mlen -= 2;
260	}
261	while (mlen >= 64) {
262		__builtin_prefetch(data + 32);
263		__builtin_prefetch(data + 64);
264		partial += *(uint32_t *)(void *)data;
265		partial += *(uint32_t *)(void *)(data + 4);
266		partial += *(uint32_t *)(void *)(data + 8);
267		partial += *(uint32_t *)(void *)(data + 12);
268		partial += *(uint32_t *)(void *)(data + 16);
269		partial += *(uint32_t *)(void *)(data + 20);
270		partial += *(uint32_t *)(void *)(data + 24);
271		partial += *(uint32_t *)(void *)(data + 28);
272		partial += *(uint32_t *)(void *)(data + 32);
273		partial += *(uint32_t *)(void *)(data + 36);
274		partial += *(uint32_t *)(void *)(data + 40);
275		partial += *(uint32_t *)(void *)(data + 44);
276		partial += *(uint32_t *)(void *)(data + 48);
277		partial += *(uint32_t *)(void *)(data + 52);
278		partial += *(uint32_t *)(void *)(data + 56);
279		partial += *(uint32_t *)(void *)(data + 60);
280		data += 64;
281		mlen -= 64;
282		if (PREDICT_FALSE(partial & (3ULL << 62))) {
283			if (needs_swap)
284				partial = (partial << 8) +
285				    (partial >> 56);
286			sum += (partial >> 32);
287			sum += (partial & 0xffffffff);
288			partial = 0;
289		}
290	}
291	/*
292	 * mlen is not updated below as the remaining tests
293	 * are using bit masks, which are not affected.
294	 */
295	if (mlen & 32) {
296		partial += *(uint32_t *)(void *)data;
297		partial += *(uint32_t *)(void *)(data + 4);
298		partial += *(uint32_t *)(void *)(data + 8);
299		partial += *(uint32_t *)(void *)(data + 12);
300		partial += *(uint32_t *)(void *)(data + 16);
301		partial += *(uint32_t *)(void *)(data + 20);
302		partial += *(uint32_t *)(void *)(data + 24);
303		partial += *(uint32_t *)(void *)(data + 28);
304		data += 32;
305	}
306	if (mlen & 16) {
307		partial += *(uint32_t *)(void *)data;
308		partial += *(uint32_t *)(void *)(data + 4);
309		partial += *(uint32_t *)(void *)(data + 8);
310		partial += *(uint32_t *)(void *)(data + 12);
311		data += 16;
312	}
313	if (mlen & 8) {
314		partial += *(uint32_t *)(void *)data;
315		partial += *(uint32_t *)(void *)(data + 4);
316		data += 8;
317	}
318	if (mlen & 4) {
319		partial += *(uint32_t *)(void *)data;
320		data += 4;
321	}
322	if (mlen & 2) {
323		partial += *(uint16_t *)(void *)data;
324		data += 2;
325	}
326trailing_bytes:
327	if (mlen & 1) {
328#if BYTE_ORDER == LITTLE_ENDIAN
329		partial += *data;
330#else
331		partial += *data << 8;
332#endif
333		started_on_odd = !started_on_odd;
334	}
335
336	if (needs_swap)
337		partial = (partial << 8) + (partial >> 56);
338	sum += (partial >> 32) + (partial & 0xffffffff);
339	sum = (sum >> 32) + (sum & 0xffffffff);
340
341	final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
342	    ((sum >> 16) & 0xffff) + (sum & 0xffff);
343	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
344	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
345
346	return (final_acc);
347}
348#endif /* ULONG_MAX != 0xffffffffUL */
349
350/*
351 * Perform 16-bit 1's complement sum on a contiguous span.
352 */
353uint16_t
354b_sum16(const void *buf, int len)
355{
356	return (in_cksumdata(buf, len));
357}
358
359uint16_t inet_cksum_simple(struct mbuf *, int);
360/*
361 * For the exported _in_cksum symbol in BSDKernel symbol set.
362 */
363uint16_t
364inet_cksum_simple(struct mbuf *m, int len)
365{
366	return (inet_cksum(m, 0, 0, len));
367}
368
369uint16_t
370in_addword(uint16_t a, uint16_t b)
371{
372	uint64_t sum = a + b;
373
374	ADDCARRY(sum);
375	return (sum);
376}
377
378uint16_t
379in_pseudo(uint32_t a, uint32_t b, uint32_t c)
380{
381        uint64_t sum;
382        union q_util q_util;
383        union l_util l_util;
384
385        sum = (uint64_t)a + b + c;
386        REDUCE16;
387        return (sum);
388}
389
390uint16_t
391in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
392{
393	uint64_t sum;
394	union q_util q_util;
395	union l_util l_util;
396
397	sum = a + b + c;
398	REDUCE16;
399	return (sum);
400}
401
402/*
403 * May be used on IP header with options.
404 */
405uint16_t
406in_cksum_hdr_opt(const struct ip *ip)
407{
408	return (~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff);
409}
410
411/*
412 * A wrapper around the simple in_cksum_hdr() and the more complicated
413 * inet_cksum(); the former is chosen if the IP header is simple,
414 * contiguous and 32-bit aligned.  Also does some stats accounting.
415 */
416uint16_t
417ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
418{
419	struct ip *ip = mtod(m, struct ip *);
420
421	if (out) {
422		ipstat.ips_snd_swcsum++;
423		ipstat.ips_snd_swcsum_bytes += hlen;
424	} else {
425		ipstat.ips_rcv_swcsum++;
426		ipstat.ips_rcv_swcsum_bytes += hlen;
427	}
428
429	if (hlen == sizeof (*ip) &&
430	    m->m_len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip))
431		return (in_cksum_hdr(ip));
432
433	return (inet_cksum(m, 0, 0, hlen));
434}
435
436/*
437 * m MUST contain at least an IP header, if nxt is specified;
438 * nxt is the upper layer protocol number;
439 * off is an offset where TCP/UDP/ICMP header starts;
440 * len is a total length of a transport segment (e.g. TCP header + TCP payload)
441 */
442uint16_t
443inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
444{
445	uint32_t sum;
446
447	sum = m_sum16(m, off, len);
448
449	/* include pseudo header checksum? */
450	if (nxt != 0) {
451		struct ip *ip;
452		unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8)));
453		uint32_t mlen;
454
455		/*
456		 * Sanity check
457		 *
458		 * Use m_length2() instead of m_length(), as we cannot rely on
459		 * the caller setting m_pkthdr.len correctly, if the mbuf is
460		 * a M_PKTHDR one.
461		 */
462		if ((mlen = m_length2(m, NULL)) < sizeof (*ip)) {
463			panic("%s: mbuf %p too short (%d) for IPv4 header",
464			    __func__, m, mlen);
465			/* NOTREACHED */
466		}
467
468		/*
469		 * In case the IP header is not contiguous, or not 32-bit
470		 * aligned, copy it to a local buffer.  Note here that we
471		 * expect the data pointer to point to the IP header.
472		 */
473		if ((sizeof (*ip) > m->m_len) ||
474		    !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
475			m_copydata(m, 0, sizeof (*ip), (caddr_t)buf);
476			ip = (struct ip *)(void *)buf;
477		} else {
478			ip = (struct ip *)(void *)(m->m_data);
479		}
480
481		/* add pseudo header checksum */
482		sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
483		    htonl(len + nxt));
484
485		/* fold in carry bits */
486		ADDCARRY(sum);
487	}
488
489	return (~sum & 0xffff);
490}
491