1/*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11/* zstd_ddict.c :
12 * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13
14/*-*******************************************************
15*  Dependencies
16*********************************************************/
17#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18#include "../common/cpu.h"         /* bmi2 */
19#include "../common/mem.h"         /* low level memory routines */
20#define FSE_STATIC_LINKING_ONLY
21#include "../common/fse.h"
22#define HUF_STATIC_LINKING_ONLY
23#include "../common/huf.h"
24#include "zstd_decompress_internal.h"
25#include "zstd_ddict.h"
26
27
28
29
30/*-*******************************************************
31*  Types
32*********************************************************/
33struct ZSTD_DDict_s {
34    void* dictBuffer;
35    const void* dictContent;
36    size_t dictSize;
37    ZSTD_entropyDTables_t entropy;
38    U32 dictID;
39    U32 entropyPresent;
40    ZSTD_customMem cMem;
41};  /* typedef'd to ZSTD_DDict within "zstd.h" */
42
43const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
44{
45    assert(ddict != NULL);
46    return ddict->dictContent;
47}
48
49size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
50{
51    assert(ddict != NULL);
52    return ddict->dictSize;
53}
54
55void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
56{
57    DEBUGLOG(4, "ZSTD_copyDDictParameters");
58    assert(dctx != NULL);
59    assert(ddict != NULL);
60    dctx->dictID = ddict->dictID;
61    dctx->prefixStart = ddict->dictContent;
62    dctx->virtualStart = ddict->dictContent;
63    dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
64    dctx->previousDstEnd = dctx->dictEnd;
65#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
66    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
67    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
68#endif
69    if (ddict->entropyPresent) {
70        dctx->litEntropy = 1;
71        dctx->fseEntropy = 1;
72        dctx->LLTptr = ddict->entropy.LLTable;
73        dctx->MLTptr = ddict->entropy.MLTable;
74        dctx->OFTptr = ddict->entropy.OFTable;
75        dctx->HUFptr = ddict->entropy.hufTable;
76        dctx->entropy.rep[0] = ddict->entropy.rep[0];
77        dctx->entropy.rep[1] = ddict->entropy.rep[1];
78        dctx->entropy.rep[2] = ddict->entropy.rep[2];
79    } else {
80        dctx->litEntropy = 0;
81        dctx->fseEntropy = 0;
82    }
83}
84
85
86static size_t
87ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
88                           ZSTD_dictContentType_e dictContentType)
89{
90    ddict->dictID = 0;
91    ddict->entropyPresent = 0;
92    if (dictContentType == ZSTD_dct_rawContent) return 0;
93
94    if (ddict->dictSize < 8) {
95        if (dictContentType == ZSTD_dct_fullDict)
96            return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
97        return 0;   /* pure content mode */
98    }
99    {   U32 const magic = MEM_readLE32(ddict->dictContent);
100        if (magic != ZSTD_MAGIC_DICTIONARY) {
101            if (dictContentType == ZSTD_dct_fullDict)
102                return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
103            return 0;   /* pure content mode */
104        }
105    }
106    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
107
108    /* load entropy tables */
109    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
110            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
111        dictionary_corrupted, "");
112    ddict->entropyPresent = 1;
113    return 0;
114}
115
116
117static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
118                                      const void* dict, size_t dictSize,
119                                      ZSTD_dictLoadMethod_e dictLoadMethod,
120                                      ZSTD_dictContentType_e dictContentType)
121{
122    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
123        ddict->dictBuffer = NULL;
124        ddict->dictContent = dict;
125        if (!dict) dictSize = 0;
126    } else {
127        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
128        ddict->dictBuffer = internalBuffer;
129        ddict->dictContent = internalBuffer;
130        if (!internalBuffer) return ERROR(memory_allocation);
131        ZSTD_memcpy(internalBuffer, dict, dictSize);
132    }
133    ddict->dictSize = dictSize;
134    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
135
136    /* parse dictionary content */
137    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
138
139    return 0;
140}
141
142ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
143                                      ZSTD_dictLoadMethod_e dictLoadMethod,
144                                      ZSTD_dictContentType_e dictContentType,
145                                      ZSTD_customMem customMem)
146{
147    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
148
149    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
150        if (ddict == NULL) return NULL;
151        ddict->cMem = customMem;
152        {   size_t const initResult = ZSTD_initDDict_internal(ddict,
153                                            dict, dictSize,
154                                            dictLoadMethod, dictContentType);
155            if (ZSTD_isError(initResult)) {
156                ZSTD_freeDDict(ddict);
157                return NULL;
158        }   }
159        return ddict;
160    }
161}
162
163/*! ZSTD_createDDict() :
164*   Create a digested dictionary, to start decompression without startup delay.
165*   `dict` content is copied inside DDict.
166*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
167ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
168{
169    ZSTD_customMem const allocator = { NULL, NULL, NULL };
170    return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
171}
172
173/*! ZSTD_createDDict_byReference() :
174 *  Create a digested dictionary, to start decompression without startup delay.
175 *  Dictionary content is simply referenced, it will be accessed during decompression.
176 *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
177ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
178{
179    ZSTD_customMem const allocator = { NULL, NULL, NULL };
180    return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
181}
182
183
184const ZSTD_DDict* ZSTD_initStaticDDict(
185                                void* sBuffer, size_t sBufferSize,
186                                const void* dict, size_t dictSize,
187                                ZSTD_dictLoadMethod_e dictLoadMethod,
188                                ZSTD_dictContentType_e dictContentType)
189{
190    size_t const neededSpace = sizeof(ZSTD_DDict)
191                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
192    ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
193    assert(sBuffer != NULL);
194    assert(dict != NULL);
195    if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
196    if (sBufferSize < neededSpace) return NULL;
197    if (dictLoadMethod == ZSTD_dlm_byCopy) {
198        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
199        dict = ddict+1;
200    }
201    if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
202                                              dict, dictSize,
203                                              ZSTD_dlm_byRef, dictContentType) ))
204        return NULL;
205    return ddict;
206}
207
208
209size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
210{
211    if (ddict==NULL) return 0;   /* support free on NULL */
212    {   ZSTD_customMem const cMem = ddict->cMem;
213        ZSTD_customFree(ddict->dictBuffer, cMem);
214        ZSTD_customFree(ddict, cMem);
215        return 0;
216    }
217}
218
219/*! ZSTD_estimateDDictSize() :
220 *  Estimate amount of memory that will be needed to create a dictionary for decompression.
221 *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
222size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
223{
224    return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
225}
226
227size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
228{
229    if (ddict==NULL) return 0;   /* support sizeof on NULL */
230    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
231}
232
233/*! ZSTD_getDictID_fromDDict() :
234 *  Provides the dictID of the dictionary loaded into `ddict`.
235 *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
236 *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
237unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
238{
239    if (ddict==NULL) return 0;
240    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
241}
242