1/* 2 * Argon2 source code package 3 * 4 * Written by Daniel Dinu and Dmitry Khovratovich, 2015 5 * 6 * This work is licensed under a Creative Commons CC0 1.0 License/Waiver. 7 * 8 * You should have received a copy of the CC0 Public Domain Dedication along 9 * with 10 * this software. If not, see 11 * <http://creativecommons.org/publicdomain/zero/1.0/>. 12 */ 13 14#include <stdint.h> 15#include <stdlib.h> 16#include <string.h> 17 18#include "argon2-core.h" 19#include "argon2.h" 20#include "private/common.h" 21#include "private/sse2_64_32.h" 22 23#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) 24 25# ifdef __GNUC__ 26# pragma GCC target("sse2") 27# pragma GCC target("ssse3") 28# endif 29 30# ifdef _MSC_VER 31# include <intrin.h> /* for _mm_set_epi64x */ 32# endif 33# include <emmintrin.h> 34# include <tmmintrin.h> 35 36# include "blamka-round-ssse3.h" 37 38static void 39fill_block(__m128i *state, const uint8_t *ref_block, uint8_t *next_block) 40{ 41 __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; 42 uint32_t i; 43 44 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { 45 block_XY[i] = state[i] = _mm_xor_si128( 46 state[i], _mm_loadu_si128((__m128i const *) (&ref_block[16 * i]))); 47 } 48 49 for (i = 0; i < 8; ++i) { 50 BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], 51 state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], 52 state[8 * i + 6], state[8 * i + 7]); 53 } 54 55 for (i = 0; i < 8; ++i) { 56 BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], 57 state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], 58 state[8 * 6 + i], state[8 * 7 + i]); 59 } 60 61 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { 62 state[i] = _mm_xor_si128(state[i], block_XY[i]); 63 _mm_storeu_si128((__m128i *) (&next_block[16 * i]), state[i]); 64 } 65} 66 67static void 68fill_block_with_xor(__m128i *state, const uint8_t *ref_block, 69 uint8_t *next_block) 70{ 71 __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; 72 uint32_t i; 73 74 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { 75 state[i] = _mm_xor_si128( 76 state[i], _mm_loadu_si128((__m128i const *) (&ref_block[16 * i]))); 77 block_XY[i] = _mm_xor_si128( 78 state[i], _mm_loadu_si128((__m128i const *) (&next_block[16 * i]))); 79 } 80 81 for (i = 0; i < 8; ++i) { 82 BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], 83 state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], 84 state[8 * i + 6], state[8 * i + 7]); 85 } 86 87 for (i = 0; i < 8; ++i) { 88 BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], 89 state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], 90 state[8 * 6 + i], state[8 * 7 + i]); 91 } 92 93 for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { 94 state[i] = _mm_xor_si128(state[i], block_XY[i]); 95 _mm_storeu_si128((__m128i *) (&next_block[16 * i]), state[i]); 96 } 97} 98 99static void 100generate_addresses(const argon2_instance_t *instance, 101 const argon2_position_t *position, uint64_t *pseudo_rands) 102{ 103 block address_block, input_block, tmp_block; 104 uint32_t i; 105 106 init_block_value(&address_block, 0); 107 init_block_value(&input_block, 0); 108 109 if (instance != NULL && position != NULL) { 110 input_block.v[0] = position->pass; 111 input_block.v[1] = position->lane; 112 input_block.v[2] = position->slice; 113 input_block.v[3] = instance->memory_blocks; 114 input_block.v[4] = instance->passes; 115 input_block.v[5] = instance->type; 116 117 for (i = 0; i < instance->segment_length; ++i) { 118 if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { 119 /* Temporary zero-initialized blocks */ 120 __m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; 121 __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; 122 123 memset(zero_block, 0, sizeof(zero_block)); 124 memset(zero2_block, 0, sizeof(zero2_block)); 125 init_block_value(&address_block, 0); 126 init_block_value(&tmp_block, 0); 127 /* Increasing index counter */ 128 input_block.v[6]++; 129 /* First iteration of G */ 130 fill_block_with_xor(zero_block, (uint8_t *) &input_block.v, 131 (uint8_t *) &tmp_block.v); 132 /* Second iteration of G */ 133 fill_block_with_xor(zero2_block, (uint8_t *) &tmp_block.v, 134 (uint8_t *) &address_block.v); 135 } 136 137 pseudo_rands[i] = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; 138 } 139 } 140} 141 142void 143fill_segment_ssse3(const argon2_instance_t *instance, 144 argon2_position_t position) 145{ 146 block *ref_block = NULL, *curr_block = NULL; 147 uint64_t pseudo_rand, ref_index, ref_lane; 148 uint32_t prev_offset, curr_offset; 149 uint32_t starting_index, i; 150 __m128i state[ARGON2_OWORDS_IN_BLOCK]; 151 int data_independent_addressing = 1; 152 153 /* Pseudo-random values that determine the reference block position */ 154 uint64_t *pseudo_rands = NULL; 155 156 if (instance == NULL) { 157 return; 158 } 159 160 if (instance->type == Argon2_id && 161 (position.pass != 0 || position.slice >= ARGON2_SYNC_POINTS / 2)) { 162 data_independent_addressing = 0; 163 } 164 165 pseudo_rands = instance->pseudo_rands; 166 167 if (data_independent_addressing) { 168 generate_addresses(instance, &position, pseudo_rands); 169 } 170 171 starting_index = 0; 172 173 if ((0 == position.pass) && (0 == position.slice)) { 174 starting_index = 2; /* we have already generated the first two blocks */ 175 } 176 177 /* Offset of the current block */ 178 curr_offset = position.lane * instance->lane_length + 179 position.slice * instance->segment_length + starting_index; 180 181 if (0 == curr_offset % instance->lane_length) { 182 /* Last block in this lane */ 183 prev_offset = curr_offset + instance->lane_length - 1; 184 } else { 185 /* Previous block */ 186 prev_offset = curr_offset - 1; 187 } 188 189 memcpy(state, ((instance->region->memory + prev_offset)->v), 190 ARGON2_BLOCK_SIZE); 191 192 for (i = starting_index; i < instance->segment_length; 193 ++i, ++curr_offset, ++prev_offset) { 194 /*1.1 Rotating prev_offset if needed */ 195 if (curr_offset % instance->lane_length == 1) { 196 prev_offset = curr_offset - 1; 197 } 198 199 /* 1.2 Computing the index of the reference block */ 200 /* 1.2.1 Taking pseudo-random value from the previous block */ 201 if (data_independent_addressing) { 202#pragma warning(push) 203#pragma warning(disable : 6385) 204 pseudo_rand = pseudo_rands[i]; 205#pragma warning(pop) 206 } else { 207 pseudo_rand = instance->region->memory[prev_offset].v[0]; 208 } 209 210 /* 1.2.2 Computing the lane of the reference block */ 211 ref_lane = ((pseudo_rand >> 32)) % instance->lanes; 212 213 if ((position.pass == 0) && (position.slice == 0)) { 214 /* Can not reference other lanes yet */ 215 ref_lane = position.lane; 216 } 217 218 /* 1.2.3 Computing the number of possible reference block within the 219 * lane. 220 */ 221 position.index = i; 222 ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, 223 ref_lane == position.lane); 224 225 /* 2 Creating a new block */ 226 ref_block = instance->region->memory + 227 instance->lane_length * ref_lane + ref_index; 228 curr_block = instance->region->memory + curr_offset; 229 if (position.pass != 0) { 230 fill_block_with_xor(state, (uint8_t *) ref_block->v, 231 (uint8_t *) curr_block->v); 232 } else { 233 fill_block(state, (uint8_t *) ref_block->v, 234 (uint8_t *) curr_block->v); 235 } 236 } 237} 238#endif 239