1184902Srwatson///////////////////////////////////////////////////////////////////////////////
2184902Srwatson//
3243750Srwatson/// \file       sha256.c
4243750Srwatson/// \brief      SHA-256
5243750Srwatson///
6243750Srwatson/// \todo       Crypto++ has x86 ASM optimizations. They use SSE so if they
7243750Srwatson///             are imported to liblzma, SSE instructions need to be used
8243750Srwatson///             conditionally to keep the code working on older boxes.
9243750Srwatson//
10243750Srwatson//  This code is based on the code found from 7-Zip, which has a modified
11243750Srwatson//  version of the SHA-256 found from Crypto++ <http://www.cryptopp.com/>.
12243750Srwatson//  The code was modified a little to fit into liblzma.
13243750Srwatson//
14243750Srwatson//  Authors:    Kevin Springle
15243750Srwatson//              Wei Dai
16243750Srwatson//              Igor Pavlov
17243750Srwatson//              Lasse Collin
18243750Srwatson//
19243750Srwatson//  This file has been put into the public domain.
20243750Srwatson//  You can do whatever you want with this file.
21243750Srwatson//
22243750Srwatson///////////////////////////////////////////////////////////////////////////////
23243750Srwatson
24243750Srwatson// Avoid bogus warnings in transform().
25243750Srwatson#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __GNUC__ > 4
26243750Srwatson#	pragma GCC diagnostic ignored "-Wuninitialized"
27243750Srwatson#endif
28196031Srwatson
29196031Srwatson#include "check.h"
30196031Srwatson
31196031Srwatson// At least on x86, GCC is able to optimize this to a rotate instruction.
32196031Srwatson#define rotr_32(num, amount) ((num) >> (amount) | (num) << (32 - (amount)))
33196031Srwatson
34195740Srwatson#define blk0(i) (W[i] = data[i])
35195740Srwatson#define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \
36195740Srwatson		+ s0(W[(i - 15) & 15]))
37195740Srwatson
38195740Srwatson#define Ch(x, y, z) (z ^ (x & (y ^ z)))
39195740Srwatson#define Maj(x, y, z) ((x & y) | (z & (x | y)))
40195740Srwatson
41195740Srwatson#define a(i) T[(0 - i) & 7]
42195740Srwatson#define b(i) T[(1 - i) & 7]
43191273Srwatson#define c(i) T[(2 - i) & 7]
44191273Srwatson#define d(i) T[(3 - i) & 7]
45191273Srwatson#define e(i) T[(4 - i) & 7]
46191273Srwatson#define f(i) T[(5 - i) & 7]
47191273Srwatson#define g(i) T[(6 - i) & 7]
48191273Srwatson#define h(i) T[(7 - i) & 7]
49191273Srwatson
50191273Srwatson#define R(i) \
51191273Srwatson	h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] \
52191273Srwatson		+ (j ? blk2(i) : blk0(i)); \
53191273Srwatson	d(i) += h(i); \
54191273Srwatson	h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
55191273Srwatson
56191273Srwatson#define S0(x) (rotr_32(x, 2) ^ rotr_32(x, 13) ^ rotr_32(x, 22))
57191273Srwatson#define S1(x) (rotr_32(x, 6) ^ rotr_32(x, 11) ^ rotr_32(x, 25))
58191273Srwatson#define s0(x) (rotr_32(x, 7) ^ rotr_32(x, 18) ^ (x >> 3))
59191273Srwatson#define s1(x) (rotr_32(x, 17) ^ rotr_32(x, 19) ^ (x >> 10))
60191273Srwatson
61189279Srwatson
62189279Srwatsonstatic const uint32_t SHA256_K[64] = {
63189279Srwatson	0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
64189279Srwatson	0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
65189279Srwatson	0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
66189279Srwatson	0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
67189279Srwatson	0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
68189279Srwatson	0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
69189279Srwatson	0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
70189279Srwatson	0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
71189279Srwatson	0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
72189279Srwatson	0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
73189279Srwatson	0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
74189279Srwatson	0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
75189279Srwatson	0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
76189279Srwatson	0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
77189279Srwatson	0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
78189279Srwatson	0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
79189279Srwatson};
80187214Srwatson
81187214Srwatson
82187214Srwatsonstatic void
83187214Srwatsontransform(uint32_t state[8], const uint32_t data[16])
84187214Srwatson{
85187214Srwatson	uint32_t W[16];
86187214Srwatson	uint32_t T[8];
87187214Srwatson
88187214Srwatson	// Copy state[] to working vars.
89187214Srwatson	memcpy(T, state, sizeof(T));
90187214Srwatson
91187214Srwatson	// 64 operations, partially loop unrolled
92187214Srwatson	for (unsigned int j = 0; j < 64; j += 16) {
93187214Srwatson		R( 0); R( 1); R( 2); R( 3);
94187214Srwatson		R( 4); R( 5); R( 6); R( 7);
95187214Srwatson		R( 8); R( 9); R(10); R(11);
96187214Srwatson		R(12); R(13); R(14); R(15);
97187214Srwatson	}
98187214Srwatson
99186647Srwatson	// Add the working vars back into state[].
100186647Srwatson	state[0] += a(0);
101186647Srwatson	state[1] += b(0);
102186647Srwatson	state[2] += c(0);
103186647Srwatson	state[3] += d(0);
104186647Srwatson	state[4] += e(0);
105186647Srwatson	state[5] += f(0);
106186647Srwatson	state[6] += g(0);
107186647Srwatson	state[7] += h(0);
108186647Srwatson}
109186647Srwatson
110186647Srwatson
111186647Srwatsonstatic void
112186647Srwatsonprocess(lzma_check_state *check)
113186647Srwatson{
114186647Srwatson#ifdef WORDS_BIGENDIAN
115186647Srwatson	transform(check->state.sha256.state, check->buffer.u32);
116186647Srwatson
117186647Srwatson#else
118186647Srwatson	uint32_t data[16];
119186647Srwatson
120186647Srwatson	for (size_t i = 0; i < 16; ++i)
121186647Srwatson		data[i] = bswap32(check->buffer.u32[i]);
122186647Srwatson
123186647Srwatson	transform(check->state.sha256.state, data);
124186647Srwatson#endif
125186647Srwatson
126186647Srwatson	return;
127186647Srwatson}
128186647Srwatson
129186647Srwatson
130186647Srwatsonextern void
131186647Srwatsonlzma_sha256_init(lzma_check_state *check)
132186647Srwatson{
133186647Srwatson	static const uint32_t s[8] = {
134186647Srwatson		0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
135186647Srwatson		0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
136186647Srwatson	};
137186647Srwatson
138186647Srwatson	memcpy(check->state.sha256.state, s, sizeof(s));
139186647Srwatson	check->state.sha256.size = 0;
140186647Srwatson
141186647Srwatson	return;
142186647Srwatson}
143186647Srwatson
144184902Srwatson
145184902Srwatsonextern void
146184902Srwatsonlzma_sha256_update(const uint8_t *buf, size_t size, lzma_check_state *check)
147184902Srwatson{
148184902Srwatson	// Copy the input data into a properly aligned temporary buffer.
149184902Srwatson	// This way we can be called with arbitrarily sized buffers
150184902Srwatson	// (no need to be multiple of 64 bytes), and the code works also
151184902Srwatson	// on architectures that don't allow unaligned memory access.
152184902Srwatson	while (size > 0) {
153184902Srwatson		const size_t copy_start = check->state.sha256.size & 0x3F;
154184902Srwatson		size_t copy_size = 64 - copy_start;
155184902Srwatson		if (copy_size > size)
156184902Srwatson			copy_size = size;
157184902Srwatson
158184902Srwatson		memcpy(check->buffer.u8 + copy_start, buf, copy_size);
159184902Srwatson
160184902Srwatson		buf += copy_size;
161184902Srwatson		size -= copy_size;
162184902Srwatson		check->state.sha256.size += copy_size;
163184902Srwatson
164184902Srwatson		if ((check->state.sha256.size & 0x3F) == 0)
165184902Srwatson			process(check);
166184902Srwatson	}
167184902Srwatson
168184902Srwatson	return;
169184902Srwatson}
170184902Srwatson
171184902Srwatson
172184902Srwatsonextern void
173184902Srwatsonlzma_sha256_finish(lzma_check_state *check)
174184902Srwatson{
175184902Srwatson	// Add padding as described in RFC 3174 (it describes SHA-1 but
176184902Srwatson	// the same padding style is used for SHA-256 too).
177184902Srwatson	size_t pos = check->state.sha256.size & 0x3F;
178184902Srwatson	check->buffer.u8[pos++] = 0x80;
179184902Srwatson
180184902Srwatson	while (pos != 64 - 8) {
181184902Srwatson		if (pos == 64) {
182184902Srwatson			process(check);
183184902Srwatson			pos = 0;
184184902Srwatson		}
185184902Srwatson
186184902Srwatson		check->buffer.u8[pos++] = 0x00;
187184902Srwatson	}
188184902Srwatson
189184902Srwatson	// Convert the message size from bytes to bits.
190184902Srwatson	check->state.sha256.size *= 8;
191184902Srwatson
192184902Srwatson	check->buffer.u64[(64 - 8) / 8] = conv64be(check->state.sha256.size);
193184902Srwatson
194184902Srwatson	process(check);
195184902Srwatson
196184902Srwatson	for (size_t i = 0; i < 8; ++i)
197184902Srwatson		check->buffer.u32[i] = conv32be(check->state.sha256.state[i]);
198184902Srwatson
199184902Srwatson	return;
200184902Srwatson}
201184902Srwatson