1/*
2 * Copyright (c) 2017-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11#include "seqgen.h"
12#include "mem.h"
13#include <string.h>
14
15#define MIN(a, b)  ((a) < (b) ? (a) : (b))
16
17static const size_t kMatchBytes = 128;
18
19#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
20static BYTE SEQ_randByte(U32* src)
21{
22    static const U32 prime1 = 2654435761U;
23    static const U32 prime2 = 2246822519U;
24    U32 rand32 = *src;
25    rand32 *= prime1;
26    rand32 ^= prime2;
27    rand32  = SEQ_rotl32(rand32, 13);
28    *src = rand32;
29    return (BYTE)(rand32 >> 5);
30}
31
32SEQ_stream SEQ_initStream(unsigned seed)
33{
34    SEQ_stream stream;
35    stream.state = 0;
36    XXH64_reset(&stream.xxh, 0);
37    stream.seed = seed;
38    return stream;
39}
40
41/* Generates a single guard byte, then match length + 1 of a different byte,
42 * then another guard byte.
43 */
44static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
45                                  SEQ_outBuffer* out)
46{
47    typedef enum {
48        ml_first_byte = 0,
49        ml_match_bytes,
50        ml_last_byte,
51    } ml_state;
52    BYTE* const ostart = (BYTE*)out->dst;
53    BYTE* const oend = ostart + out->size;
54    BYTE* op = ostart + out->pos;
55
56    switch ((ml_state)stream->state) {
57    case ml_first_byte:
58        /* Generate a single byte and pick a different byte for the match */
59        if (op >= oend) {
60            stream->bytesLeft = 1;
61            break;
62        }
63        *op = SEQ_randByte(&stream->seed) & 0xFF;
64        do {
65            stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
66        } while (*op == stream->saved);
67        ++op;
68        /* State transition */
69        stream->state = ml_match_bytes;
70        stream->bytesLeft = value + 1;
71    /* fall-through */
72    case ml_match_bytes: {
73        /* Copy matchLength + 1 bytes to the output buffer */
74        size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
75        if (setLength > 0) {
76            memset(op, stream->saved, setLength);
77            op += setLength;
78            stream->bytesLeft -= setLength;
79        }
80        if (stream->bytesLeft > 0)
81            break;
82        /* State transition */
83        stream->state = ml_last_byte;
84    }
85    /* fall-through */
86    case ml_last_byte:
87        /* Generate a single byte and pick a different byte for the match */
88        if (op >= oend) {
89            stream->bytesLeft = 1;
90            break;
91        }
92        do {
93            *op = SEQ_randByte(&stream->seed) & 0xFF;
94        } while (*op == stream->saved);
95        ++op;
96        /* State transition */
97    /* fall-through */
98    default:
99        stream->state = 0;
100        stream->bytesLeft = 0;
101        break;
102    }
103    XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
104    out->pos = op - ostart;
105    return stream->bytesLeft;
106}
107
108/* Saves the current seed then generates kMatchBytes random bytes >= 128.
109 * Generates literal length - kMatchBytes random bytes < 128.
110 * Generates another kMatchBytes using the saved seed to generate a match.
111 * This way the match is easy to find for the compressors.
112 */
113static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
114{
115    typedef enum {
116        ll_start = 0,
117        ll_run_bytes,
118        ll_literals,
119        ll_run_match,
120    } ll_state;
121    BYTE* const ostart = (BYTE*)out->dst;
122    BYTE* const oend = ostart + out->size;
123    BYTE* op = ostart + out->pos;
124
125    switch ((ll_state)stream->state) {
126    case ll_start:
127        stream->state = ll_run_bytes;
128        stream->saved = stream->seed;
129        stream->bytesLeft = MIN(kMatchBytes, value);
130    /* fall-through */
131    case ll_run_bytes:
132        while (stream->bytesLeft > 0 && op < oend) {
133            *op++ = SEQ_randByte(&stream->seed) | 0x80;
134            --stream->bytesLeft;
135        }
136        if (stream->bytesLeft > 0)
137            break;
138        /* State transition */
139        stream->state = ll_literals;
140        stream->bytesLeft = value - MIN(kMatchBytes, value);
141    /* fall-through */
142    case ll_literals:
143        while (stream->bytesLeft > 0 && op < oend) {
144            *op++ = SEQ_randByte(&stream->seed) & 0x7F;
145            --stream->bytesLeft;
146        }
147        if (stream->bytesLeft > 0)
148            break;
149        /* State transition */
150        stream->state = ll_run_match;
151        stream->bytesLeft = MIN(kMatchBytes, value);
152    /* fall-through */
153    case ll_run_match: {
154        while (stream->bytesLeft > 0 && op < oend) {
155            *op++ = SEQ_randByte(&stream->saved) | 0x80;
156            --stream->bytesLeft;
157        }
158        if (stream->bytesLeft > 0)
159            break;
160    }
161    /* fall-through */
162    default:
163        stream->state = 0;
164        stream->bytesLeft = 0;
165        break;
166    }
167    XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
168    out->pos = op - ostart;
169    return stream->bytesLeft;
170}
171
172/* Saves the current seed then generates kMatchBytes random bytes >= 128.
173 * Generates offset - kMatchBytes of zeros to get a large offset without
174 * polluting the hash tables.
175 * Generates another kMatchBytes using the saved seed to generate a with the
176 * required offset.
177 */
178static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
179{
180    typedef enum {
181        of_start = 0,
182        of_run_bytes,
183        of_offset,
184        of_run_match,
185    } of_state;
186    BYTE* const ostart = (BYTE*)out->dst;
187    BYTE* const oend = ostart + out->size;
188    BYTE* op = ostart + out->pos;
189
190    switch ((of_state)stream->state) {
191    case of_start:
192        stream->state = of_run_bytes;
193        stream->saved = stream->seed;
194        stream->bytesLeft = MIN(value, kMatchBytes);
195    /* fall-through */
196    case of_run_bytes: {
197        while (stream->bytesLeft > 0 && op < oend) {
198            *op++ = SEQ_randByte(&stream->seed) | 0x80;
199            --stream->bytesLeft;
200        }
201        if (stream->bytesLeft > 0)
202            break;
203        /* State transition */
204        stream->state = of_offset;
205        stream->bytesLeft = value - MIN(value, kMatchBytes);
206    }
207    /* fall-through */
208    case of_offset: {
209        /* Copy matchLength + 1 bytes to the output buffer */
210        size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
211        if (setLength > 0) {
212            memset(op, 0, setLength);
213            op += setLength;
214            stream->bytesLeft -= setLength;
215        }
216        if (stream->bytesLeft > 0)
217            break;
218        /* State transition */
219        stream->state = of_run_match;
220        stream->bytesLeft = MIN(value, kMatchBytes);
221    }
222    /* fall-through */
223    case of_run_match: {
224        while (stream->bytesLeft > 0 && op < oend) {
225            *op++ = SEQ_randByte(&stream->saved) | 0x80;
226            --stream->bytesLeft;
227        }
228        if (stream->bytesLeft > 0)
229            break;
230    }
231    /* fall-through */
232    default:
233        stream->state = 0;
234        stream->bytesLeft = 0;
235        break;
236    }
237    XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
238    out->pos = op - ostart;
239    return stream->bytesLeft;
240}
241
242/* Returns the number of bytes left to generate.
243 * Must pass the same type/value until it returns 0.
244 */
245size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
246{
247    switch (type) {
248        case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
249        case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
250        case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
251        case SEQ_gen_max: /* fall-through */
252        default: return 0;
253    }
254}
255
256/* Returns the xxhash of the data produced so far */
257XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
258{
259    return XXH64_digest(&stream->xxh);
260}
261