1/* 2 * Copyright (c) 2017-present, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11#include "seqgen.h" 12#include "mem.h" 13#include <string.h> 14 15#define MIN(a, b) ((a) < (b) ? (a) : (b)) 16 17static const size_t kMatchBytes = 128; 18 19#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r))) 20static BYTE SEQ_randByte(U32* src) 21{ 22 static const U32 prime1 = 2654435761U; 23 static const U32 prime2 = 2246822519U; 24 U32 rand32 = *src; 25 rand32 *= prime1; 26 rand32 ^= prime2; 27 rand32 = SEQ_rotl32(rand32, 13); 28 *src = rand32; 29 return (BYTE)(rand32 >> 5); 30} 31 32SEQ_stream SEQ_initStream(unsigned seed) 33{ 34 SEQ_stream stream; 35 stream.state = 0; 36 XXH64_reset(&stream.xxh, 0); 37 stream.seed = seed; 38 return stream; 39} 40 41/* Generates a single guard byte, then match length + 1 of a different byte, 42 * then another guard byte. 43 */ 44static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value, 45 SEQ_outBuffer* out) 46{ 47 typedef enum { 48 ml_first_byte = 0, 49 ml_match_bytes, 50 ml_last_byte, 51 } ml_state; 52 BYTE* const ostart = (BYTE*)out->dst; 53 BYTE* const oend = ostart + out->size; 54 BYTE* op = ostart + out->pos; 55 56 switch ((ml_state)stream->state) { 57 case ml_first_byte: 58 /* Generate a single byte and pick a different byte for the match */ 59 if (op >= oend) { 60 stream->bytesLeft = 1; 61 break; 62 } 63 *op = SEQ_randByte(&stream->seed) & 0xFF; 64 do { 65 stream->saved = SEQ_randByte(&stream->seed) & 0xFF; 66 } while (*op == stream->saved); 67 ++op; 68 /* State transition */ 69 stream->state = ml_match_bytes; 70 stream->bytesLeft = value + 1; 71 /* fall-through */ 72 case ml_match_bytes: { 73 /* Copy matchLength + 1 bytes to the output buffer */ 74 size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); 75 if (setLength > 0) { 76 memset(op, stream->saved, setLength); 77 op += setLength; 78 stream->bytesLeft -= setLength; 79 } 80 if (stream->bytesLeft > 0) 81 break; 82 /* State transition */ 83 stream->state = ml_last_byte; 84 } 85 /* fall-through */ 86 case ml_last_byte: 87 /* Generate a single byte and pick a different byte for the match */ 88 if (op >= oend) { 89 stream->bytesLeft = 1; 90 break; 91 } 92 do { 93 *op = SEQ_randByte(&stream->seed) & 0xFF; 94 } while (*op == stream->saved); 95 ++op; 96 /* State transition */ 97 /* fall-through */ 98 default: 99 stream->state = 0; 100 stream->bytesLeft = 0; 101 break; 102 } 103 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); 104 out->pos = op - ostart; 105 return stream->bytesLeft; 106} 107 108/* Saves the current seed then generates kMatchBytes random bytes >= 128. 109 * Generates literal length - kMatchBytes random bytes < 128. 110 * Generates another kMatchBytes using the saved seed to generate a match. 111 * This way the match is easy to find for the compressors. 112 */ 113static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) 114{ 115 typedef enum { 116 ll_start = 0, 117 ll_run_bytes, 118 ll_literals, 119 ll_run_match, 120 } ll_state; 121 BYTE* const ostart = (BYTE*)out->dst; 122 BYTE* const oend = ostart + out->size; 123 BYTE* op = ostart + out->pos; 124 125 switch ((ll_state)stream->state) { 126 case ll_start: 127 stream->state = ll_run_bytes; 128 stream->saved = stream->seed; 129 stream->bytesLeft = MIN(kMatchBytes, value); 130 /* fall-through */ 131 case ll_run_bytes: 132 while (stream->bytesLeft > 0 && op < oend) { 133 *op++ = SEQ_randByte(&stream->seed) | 0x80; 134 --stream->bytesLeft; 135 } 136 if (stream->bytesLeft > 0) 137 break; 138 /* State transition */ 139 stream->state = ll_literals; 140 stream->bytesLeft = value - MIN(kMatchBytes, value); 141 /* fall-through */ 142 case ll_literals: 143 while (stream->bytesLeft > 0 && op < oend) { 144 *op++ = SEQ_randByte(&stream->seed) & 0x7F; 145 --stream->bytesLeft; 146 } 147 if (stream->bytesLeft > 0) 148 break; 149 /* State transition */ 150 stream->state = ll_run_match; 151 stream->bytesLeft = MIN(kMatchBytes, value); 152 /* fall-through */ 153 case ll_run_match: { 154 while (stream->bytesLeft > 0 && op < oend) { 155 *op++ = SEQ_randByte(&stream->saved) | 0x80; 156 --stream->bytesLeft; 157 } 158 if (stream->bytesLeft > 0) 159 break; 160 } 161 /* fall-through */ 162 default: 163 stream->state = 0; 164 stream->bytesLeft = 0; 165 break; 166 } 167 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); 168 out->pos = op - ostart; 169 return stream->bytesLeft; 170} 171 172/* Saves the current seed then generates kMatchBytes random bytes >= 128. 173 * Generates offset - kMatchBytes of zeros to get a large offset without 174 * polluting the hash tables. 175 * Generates another kMatchBytes using the saved seed to generate a with the 176 * required offset. 177 */ 178static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out) 179{ 180 typedef enum { 181 of_start = 0, 182 of_run_bytes, 183 of_offset, 184 of_run_match, 185 } of_state; 186 BYTE* const ostart = (BYTE*)out->dst; 187 BYTE* const oend = ostart + out->size; 188 BYTE* op = ostart + out->pos; 189 190 switch ((of_state)stream->state) { 191 case of_start: 192 stream->state = of_run_bytes; 193 stream->saved = stream->seed; 194 stream->bytesLeft = MIN(value, kMatchBytes); 195 /* fall-through */ 196 case of_run_bytes: { 197 while (stream->bytesLeft > 0 && op < oend) { 198 *op++ = SEQ_randByte(&stream->seed) | 0x80; 199 --stream->bytesLeft; 200 } 201 if (stream->bytesLeft > 0) 202 break; 203 /* State transition */ 204 stream->state = of_offset; 205 stream->bytesLeft = value - MIN(value, kMatchBytes); 206 } 207 /* fall-through */ 208 case of_offset: { 209 /* Copy matchLength + 1 bytes to the output buffer */ 210 size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op)); 211 if (setLength > 0) { 212 memset(op, 0, setLength); 213 op += setLength; 214 stream->bytesLeft -= setLength; 215 } 216 if (stream->bytesLeft > 0) 217 break; 218 /* State transition */ 219 stream->state = of_run_match; 220 stream->bytesLeft = MIN(value, kMatchBytes); 221 } 222 /* fall-through */ 223 case of_run_match: { 224 while (stream->bytesLeft > 0 && op < oend) { 225 *op++ = SEQ_randByte(&stream->saved) | 0x80; 226 --stream->bytesLeft; 227 } 228 if (stream->bytesLeft > 0) 229 break; 230 } 231 /* fall-through */ 232 default: 233 stream->state = 0; 234 stream->bytesLeft = 0; 235 break; 236 } 237 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos); 238 out->pos = op - ostart; 239 return stream->bytesLeft; 240} 241 242/* Returns the number of bytes left to generate. 243 * Must pass the same type/value until it returns 0. 244 */ 245size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out) 246{ 247 switch (type) { 248 case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out); 249 case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out); 250 case SEQ_gen_of: return SEQ_gen_offset(stream, value, out); 251 case SEQ_gen_max: /* fall-through */ 252 default: return 0; 253 } 254} 255 256/* Returns the xxhash of the data produced so far */ 257XXH64_hash_t SEQ_digest(SEQ_stream const* stream) 258{ 259 return XXH64_digest(&stream->xxh); 260} 261