1/*
2 * Argon2 reference source code package - reference C implementations
3 *
4 * Copyright 2015
5 * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6 *
7 * You may use this work under the terms of a Creative Commons CC0 1.0
8 * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9 * these licenses can be found at:
10 *
11 * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
12 * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * You should have received a copy of both of these licenses along with this
15 * software. If not, they may be obtained at the above URLs.
16 */
17
18#include <stdint.h>
19#include <string.h>
20#include <stdlib.h>
21
22#include "argon2.h"
23#include "core.h"
24
25#include "blake2/blake2.h"
26#include "blake2/blamka-round-opt.h"
27
28/*
29 * Function fills a new memory block and optionally XORs the old block over the new one.
30 * Memory must be initialized.
31 * @param state Pointer to the just produced block. Content will be updated(!)
32 * @param ref_block Pointer to the reference block
33 * @param next_block Pointer to the block to be XORed over. May coincide with @ref_block
34 * @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
35 * @pre all block pointers must be valid
36 */
37#if defined(__AVX512F__)
38static void fill_block(__m512i *state, const block *ref_block,
39                       block *next_block, int with_xor) {
40    __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
41    unsigned int i;
42
43    if (with_xor) {
44        for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
45            state[i] = _mm512_xor_si512(
46                state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
47            block_XY[i] = _mm512_xor_si512(
48                state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
49        }
50    } else {
51        for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
52            block_XY[i] = state[i] = _mm512_xor_si512(
53                state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
54        }
55    }
56
57    for (i = 0; i < 2; ++i) {
58        BLAKE2_ROUND_1(
59            state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
60            state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
61    }
62
63    for (i = 0; i < 2; ++i) {
64        BLAKE2_ROUND_2(
65            state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
66            state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
67    }
68
69    for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
70        state[i] = _mm512_xor_si512(state[i], block_XY[i]);
71        _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]);
72    }
73}
74#elif defined(__AVX2__)
75static void fill_block(__m256i *state, const block *ref_block,
76                       block *next_block, int with_xor) {
77    __m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
78    unsigned int i;
79
80    if (with_xor) {
81        for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
82            state[i] = _mm256_xor_si256(
83                state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
84            block_XY[i] = _mm256_xor_si256(
85                state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
86        }
87    } else {
88        for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
89            block_XY[i] = state[i] = _mm256_xor_si256(
90                state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
91        }
92    }
93
94    for (i = 0; i < 4; ++i) {
95        BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
96                       state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
97    }
98
99    for (i = 0; i < 4; ++i) {
100        BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
101                       state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
102    }
103
104    for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
105        state[i] = _mm256_xor_si256(state[i], block_XY[i]);
106        _mm256_storeu_si256((__m256i *)next_block->v + i, state[i]);
107    }
108}
109#else
110static void fill_block(__m128i *state, const block *ref_block,
111                       block *next_block, int with_xor) {
112    __m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
113    unsigned int i;
114
115    if (with_xor) {
116        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
117            state[i] = _mm_xor_si128(
118                state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
119            block_XY[i] = _mm_xor_si128(
120                state[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
121        }
122    } else {
123        for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
124            block_XY[i] = state[i] = _mm_xor_si128(
125                state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
126        }
127    }
128
129    for (i = 0; i < 8; ++i) {
130        BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
131            state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
132            state[8 * i + 6], state[8 * i + 7]);
133    }
134
135    for (i = 0; i < 8; ++i) {
136        BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
137            state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
138            state[8 * 6 + i], state[8 * 7 + i]);
139    }
140
141    for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
142        state[i] = _mm_xor_si128(state[i], block_XY[i]);
143        _mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
144    }
145}
146#endif
147
148static void next_addresses(block *address_block, block *input_block) {
149    /*Temporary zero-initialized blocks*/
150#if defined(__AVX512F__)
151    __m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK];
152    __m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK];
153#elif defined(__AVX2__)
154    __m256i zero_block[ARGON2_HWORDS_IN_BLOCK];
155    __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK];
156#else
157    __m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
158    __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
159#endif
160
161    memset(zero_block, 0, sizeof(zero_block));
162    memset(zero2_block, 0, sizeof(zero2_block));
163
164    /*Increasing index counter*/
165    input_block->v[6]++;
166
167    /*First iteration of G*/
168    fill_block(zero_block, input_block, address_block, 0);
169
170    /*Second iteration of G*/
171    fill_block(zero2_block, address_block, address_block, 0);
172}
173
174void fill_segment(const argon2_instance_t *instance,
175                  argon2_position_t position) {
176    block *ref_block = NULL, *curr_block = NULL;
177    block address_block, input_block;
178    uint64_t pseudo_rand, ref_index, ref_lane;
179    uint32_t prev_offset, curr_offset;
180    uint32_t starting_index, i;
181#if defined(__AVX512F__)
182    __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
183#elif defined(__AVX2__)
184    __m256i state[ARGON2_HWORDS_IN_BLOCK];
185#else
186    __m128i state[ARGON2_OWORDS_IN_BLOCK];
187#endif
188    int data_independent_addressing;
189
190    if (instance == NULL) {
191        return;
192    }
193
194    data_independent_addressing =
195        (instance->type == Argon2_i) ||
196        (instance->type == Argon2_id && (position.pass == 0) &&
197         (position.slice < ARGON2_SYNC_POINTS / 2));
198
199    if (data_independent_addressing) {
200        init_block_value(&input_block, 0);
201
202        input_block.v[0] = position.pass;
203        input_block.v[1] = position.lane;
204        input_block.v[2] = position.slice;
205        input_block.v[3] = instance->memory_blocks;
206        input_block.v[4] = instance->passes;
207        input_block.v[5] = instance->type;
208    }
209
210    starting_index = 0;
211
212    if ((0 == position.pass) && (0 == position.slice)) {
213        starting_index = 2; /* we have already generated the first two blocks */
214
215        /* Don't forget to generate the first block of addresses: */
216        if (data_independent_addressing) {
217            next_addresses(&address_block, &input_block);
218        }
219    }
220
221    /* Offset of the current block */
222    curr_offset = position.lane * instance->lane_length +
223                  position.slice * instance->segment_length + starting_index;
224
225    if (0 == curr_offset % instance->lane_length) {
226        /* Last block in this lane */
227        prev_offset = curr_offset + instance->lane_length - 1;
228    } else {
229        /* Previous block */
230        prev_offset = curr_offset - 1;
231    }
232
233    memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
234
235    for (i = starting_index; i < instance->segment_length;
236         ++i, ++curr_offset, ++prev_offset) {
237        /*1.1 Rotating prev_offset if needed */
238        if (curr_offset % instance->lane_length == 1) {
239            prev_offset = curr_offset - 1;
240        }
241
242        /* 1.2 Computing the index of the reference block */
243        /* 1.2.1 Taking pseudo-random value from the previous block */
244        if (data_independent_addressing) {
245            if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
246                next_addresses(&address_block, &input_block);
247            }
248            pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
249        } else {
250            pseudo_rand = instance->memory[prev_offset].v[0];
251        }
252
253        /* 1.2.2 Computing the lane of the reference block */
254        ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
255
256        if ((position.pass == 0) && (position.slice == 0)) {
257            /* Can not reference other lanes yet */
258            ref_lane = position.lane;
259        }
260
261        /* 1.2.3 Computing the number of possible reference block within the
262         * lane.
263         */
264        position.index = i;
265        ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
266                                ref_lane == position.lane);
267
268        /* 2 Creating a new block */
269        ref_block =
270            instance->memory + instance->lane_length * ref_lane + ref_index;
271        curr_block = instance->memory + curr_offset;
272        if (ARGON2_VERSION_10 == instance->version) {
273            /* version 1.2.1 and earlier: overwrite, not XOR */
274            fill_block(state, ref_block, curr_block, 0);
275        } else {
276            if(0 == position.pass) {
277                fill_block(state, ref_block, curr_block, 0);
278            } else {
279                fill_block(state, ref_block, curr_block, 1);
280            }
281        }
282    }
283}
284