1/* 2 * Argon2 reference source code package - reference C implementations 3 * 4 * Copyright 2015 5 * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves 6 * 7 * You may use this work under the terms of a Creative Commons CC0 1.0 8 * License/Waiver or the Apache Public License 2.0, at your option. The terms of 9 * these licenses can be found at: 10 * 11 * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 12 * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * You should have received a copy of both of these licenses along with this 15 * software. If not, they may be obtained at the above URLs. 16 */ 17 18#include <stdint.h> 19#include <string.h> 20#include <stdlib.h> 21 22#include "argon2.h" 23#include "core.h" 24 25#include "blake2/blake2.h" 26#include "blake2/blamka-round-opt.h" 27 28/* 29 * Function fills a new memory block and optionally XORs the old block over the new one. 30 * Memory must be initialized. 31 * @param state Pointer to the just produced block. Content will be updated(!) 32 * @param ref_block Pointer to the reference block 33 * @param next_block Pointer to the block to be XORed over. May coincide with @ref_block 34 * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) 35 * @pre all block pointers must be valid 36 */ 37#if defined(__AVX512F__) 38static void fill_block(__m512i *state, const block *ref_block, 39 block *next_block, int with_xor) { 40 __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK]; 41 unsigned int i; 42 43 if (with_xor) { 44 for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { 45 state[i] = _mm512_xor_si512( 46 state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); 47 block_XY[i] = _mm512_xor_si512( 48 state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); 49 } 50 } else { 51 for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { 52 block_XY[i] = state[i] = _mm512_xor_si512( 53 state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); 54 } 55 } 56 57 for (i = 0; i < 2; ++i) { 58 BLAKE2_ROUND_1( 59 state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 60 state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 61 } 62 63 for (i = 0; i < 2; ++i) { 64 BLAKE2_ROUND_2( 65 state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i], 66 state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]); 67 } 68 69 for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { 70 state[i] = _mm512_xor_si512(state[i], block_XY[i]); 71 _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]); 72 } 73} 74#elif defined(__AVX2__) 75static void fill_block(__m256i *state, const block *ref_block, 76 block *next_block, int with_xor) { 77 __m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; 78 unsigned int i; 79 80 if (with_xor) { 81 for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { 82 state[i] = _mm256_xor_si256( 83 state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); 84 block_XY[i] = _mm256_xor_si256( 85 state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); 86 } 87 } else { 88 for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { 89 block_XY[i] = state[i] = _mm256_xor_si256( 90 state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); 91 } 92 } 93 94 for (i = 0; i < 4; ++i) { 95 BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], 96 state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); 97 } 98 99 for (i = 0; i < 4; ++i) { 100 BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i], 101 state[16 + i], state[20 + i], state[24 + i], state[28 + i]); 102 } 103 104 for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { 105 state[i] = _mm256_xor_si256(state[i], block_XY[i]); 106 _mm256_storeu_si256((__m256i *)next_block->v + i, state[i]); 107 } 108} 109#else 110static void fill_block(__m128i *state, const block *ref_block, 111 block *next_block, int with_xor) { 112 __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; 113 unsigned int i; 114 115 if (with_xor) { 116 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { 117 state[i] = _mm_xor_si128( 118 state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); 119 block_XY[i] = _mm_xor_si128( 120 state[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); 121 } 122 } else { 123 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { 124 block_XY[i] = state[i] = _mm_xor_si128( 125 state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); 126 } 127 } 128 129 for (i = 0; i < 8; ++i) { 130 BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], 131 state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], 132 state[8 * i + 6], state[8 * i + 7]); 133 } 134 135 for (i = 0; i < 8; ++i) { 136 BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], 137 state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], 138 state[8 * 6 + i], state[8 * 7 + i]); 139 } 140 141 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { 142 state[i] = _mm_xor_si128(state[i], block_XY[i]); 143 _mm_storeu_si128((__m128i *)next_block->v + i, state[i]); 144 } 145} 146#endif 147 148static void next_addresses(block *address_block, block *input_block) { 149 /*Temporary zero-initialized blocks*/ 150#if defined(__AVX512F__) 151 __m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK]; 152 __m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK]; 153#elif defined(__AVX2__) 154 __m256i zero_block[ARGON2_HWORDS_IN_BLOCK]; 155 __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK]; 156#else 157 __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; 158 __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; 159#endif 160 161 memset(zero_block, 0, sizeof(zero_block)); 162 memset(zero2_block, 0, sizeof(zero2_block)); 163 164 /*Increasing index counter*/ 165 input_block->v[6]++; 166 167 /*First iteration of G*/ 168 fill_block(zero_block, input_block, address_block, 0); 169 170 /*Second iteration of G*/ 171 fill_block(zero2_block, address_block, address_block, 0); 172} 173 174void fill_segment(const argon2_instance_t *instance, 175 argon2_position_t position) { 176 block *ref_block = NULL, *curr_block = NULL; 177 block address_block, input_block; 178 uint64_t pseudo_rand, ref_index, ref_lane; 179 uint32_t prev_offset, curr_offset; 180 uint32_t starting_index, i; 181#if defined(__AVX512F__) 182 __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK]; 183#elif defined(__AVX2__) 184 __m256i state[ARGON2_HWORDS_IN_BLOCK]; 185#else 186 __m128i state[ARGON2_OWORDS_IN_BLOCK]; 187#endif 188 int data_independent_addressing; 189 190 if (instance == NULL) { 191 return; 192 } 193 194 data_independent_addressing = 195 (instance->type == Argon2_i) || 196 (instance->type == Argon2_id && (position.pass == 0) && 197 (position.slice < ARGON2_SYNC_POINTS / 2)); 198 199 if (data_independent_addressing) { 200 init_block_value(&input_block, 0); 201 202 input_block.v[0] = position.pass; 203 input_block.v[1] = position.lane; 204 input_block.v[2] = position.slice; 205 input_block.v[3] = instance->memory_blocks; 206 input_block.v[4] = instance->passes; 207 input_block.v[5] = instance->type; 208 } 209 210 starting_index = 0; 211 212 if ((0 == position.pass) && (0 == position.slice)) { 213 starting_index = 2; /* we have already generated the first two blocks */ 214 215 /* Don't forget to generate the first block of addresses: */ 216 if (data_independent_addressing) { 217 next_addresses(&address_block, &input_block); 218 } 219 } 220 221 /* Offset of the current block */ 222 curr_offset = position.lane * instance->lane_length + 223 position.slice * instance->segment_length + starting_index; 224 225 if (0 == curr_offset % instance->lane_length) { 226 /* Last block in this lane */ 227 prev_offset = curr_offset + instance->lane_length - 1; 228 } else { 229 /* Previous block */ 230 prev_offset = curr_offset - 1; 231 } 232 233 memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); 234 235 for (i = starting_index; i < instance->segment_length; 236 ++i, ++curr_offset, ++prev_offset) { 237 /*1.1 Rotating prev_offset if needed */ 238 if (curr_offset % instance->lane_length == 1) { 239 prev_offset = curr_offset - 1; 240 } 241 242 /* 1.2 Computing the index of the reference block */ 243 /* 1.2.1 Taking pseudo-random value from the previous block */ 244 if (data_independent_addressing) { 245 if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { 246 next_addresses(&address_block, &input_block); 247 } 248 pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; 249 } else { 250 pseudo_rand = instance->memory[prev_offset].v[0]; 251 } 252 253 /* 1.2.2 Computing the lane of the reference block */ 254 ref_lane = ((pseudo_rand >> 32)) % instance->lanes; 255 256 if ((position.pass == 0) && (position.slice == 0)) { 257 /* Can not reference other lanes yet */ 258 ref_lane = position.lane; 259 } 260 261 /* 1.2.3 Computing the number of possible reference block within the 262 * lane. 263 */ 264 position.index = i; 265 ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, 266 ref_lane == position.lane); 267 268 /* 2 Creating a new block */ 269 ref_block = 270 instance->memory + instance->lane_length * ref_lane + ref_index; 271 curr_block = instance->memory + curr_offset; 272 if (ARGON2_VERSION_10 == instance->version) { 273 /* version 1.2.1 and earlier: overwrite, not XOR */ 274 fill_block(state, ref_block, curr_block, 0); 275 } else { 276 if(0 == position.pass) { 277 fill_block(state, ref_block, curr_block, 0); 278 } else { 279 fill_block(state, ref_block, curr_block, 1); 280 } 281 } 282 } 283} 284