index.h revision 299742
1/* index.h : interface to FSFS indexing functionality
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#ifndef SVN_LIBSVN_FS__INDEX_H
24#define SVN_LIBSVN_FS__INDEX_H
25
26#include "fs.h"
27#include "rev_file.h"
28
29/* Per-defined item index values.  They are used to identify empty or
30 * mandatory items.
31 */
32#define SVN_FS_FS__ITEM_INDEX_UNUSED     0  /* invalid / reserved value */
33#define SVN_FS_FS__ITEM_INDEX_CHANGES    1  /* list of changed paths */
34#define SVN_FS_FS__ITEM_INDEX_ROOT_NODE  2  /* the root noderev */
35#define SVN_FS_FS__ITEM_INDEX_FIRST_USER 3  /* first noderev to be freely
36                                               assigned */
37
38/* Data / item types as stored in the phys-to-log index.
39 */
40#define SVN_FS_FS__ITEM_TYPE_UNUSED     0  /* file section not used */
41#define SVN_FS_FS__ITEM_TYPE_FILE_REP   1  /* item is a file representation */
42#define SVN_FS_FS__ITEM_TYPE_DIR_REP    2  /* item is a directory rep. */
43#define SVN_FS_FS__ITEM_TYPE_FILE_PROPS 3  /* item is a file property rep. */
44#define SVN_FS_FS__ITEM_TYPE_DIR_PROPS  4  /* item is a directory prop rep */
45#define SVN_FS_FS__ITEM_TYPE_NODEREV    5  /* item is a noderev */
46#define SVN_FS_FS__ITEM_TYPE_CHANGES    6  /* item is a changed paths list */
47
48#define SVN_FS_FS__ITEM_TYPE_ANY_REP    7  /* item is any representation.
49                                              Only used in pre-format7. */
50
51/* Open / create a log-to-phys index file with the full file path name
52 * FILE_NAME.  Return the open file in *PROTO_INDEX allocated in
53 * RESULT_POOL.
54 */
55svn_error_t *
56svn_fs_fs__l2p_proto_index_open(apr_file_t **proto_index,
57                                const char *file_name,
58                                apr_pool_t *result_pool);
59
60/* Call this function before adding entries for the next revision to the
61 * log-to-phys index file in PROTO_INDEX.  Use SCRATCH_POOL for temporary
62 * allocations.
63 */
64svn_error_t *
65svn_fs_fs__l2p_proto_index_add_revision(apr_file_t *proto_index,
66                                        apr_pool_t *scratch_pool);
67
68/* Add a new mapping, ITEM_INDEX to the OFFSET, to log-to-phys index file
69 * in PROTO_INDEX.  Please note that mappings may be added in any order
70 * but duplicate entries for the same ITEM_INDEX are not supported.
71 * Not all possible index values need to be used.  OFFSET may be -1 to
72 * mark 'invalid' item indexes but that is already implied for all item
73 * indexes not explicitly given a mapping.
74 *
75 * Use SCRATCH_POOL for temporary allocations.
76 */
77svn_error_t *
78svn_fs_fs__l2p_proto_index_add_entry(apr_file_t *proto_index,
79                                     apr_off_t offset,
80                                     apr_uint64_t item_index,
81                                     apr_pool_t *scratch_pool);
82
83/* Use the proto index file stored at PROTO_FILE_NAME, construct the final
84 * log-to-phys index and append it to INDEX_FILE.  The first revision will
85 * be REVISION, entries to the next revision will be assigned to REVISION+1
86 * and so forth.
87 *
88 * Return the MD5 checksum of the on-disk index data in *CHECKSUM, allocated
89 * in RESULT_POOL.  Use SCRATCH_POOL for temporary allocations.
90 */
91svn_error_t *
92svn_fs_fs__l2p_index_append(svn_checksum_t **checksum,
93                            svn_fs_t *fs,
94                            apr_file_t *index_file,
95                            const char *proto_file_name,
96                            svn_revnum_t revision,
97                            apr_pool_t *result_pool,
98                            apr_pool_t *scratch_pool);
99
100/* Open / create a phys-to-log index file with the full file path name
101 * FILE_NAME.  Return the open file in *PROTO_INDEX allocated in
102 * RESULT_POOL.
103 */
104svn_error_t *
105svn_fs_fs__p2l_proto_index_open(apr_file_t **proto_index,
106                                const char *file_name,
107                                apr_pool_t *result_pool);
108
109/* Add a new mapping ENTRY to the phys-to-log index file in PROTO_INDEX.
110 * The entries must be added in ascending offset order and must not leave
111 * intermittent ranges uncovered.  The revision value in ENTRY may be
112 * SVN_INVALID_REVISION.  Use SCRATCH_POOL for temporary allocations.
113 */
114svn_error_t *
115svn_fs_fs__p2l_proto_index_add_entry(apr_file_t *proto_index,
116                                     const svn_fs_fs__p2l_entry_t *entry,
117                                     apr_pool_t *scratch_pool);
118
119/* Set *NEXT_OFFSET to the first offset behind the last entry in the
120 * phys-to-log proto index file PROTO_INDEX.  This will be 0 for empty
121 * index files.  Use SCRATCH_POOL for temporary allocations.
122 */
123svn_error_t *
124svn_fs_fs__p2l_proto_index_next_offset(apr_off_t *next_offset,
125                                       apr_file_t *proto_index,
126                                       apr_pool_t *scratch_pool);
127
128/* Use the proto index file stored at PROTO_FILE_NAME, construct the final
129 * phys-to-log index and append it to INDEX_FILE.  Entries without a valid
130 * revision will be assigned to the REVISION given here.
131 *
132 * Return the MD5 checksum of the on-disk index data in *CHECKSUM, allocated
133 * in RESULT_POOL.  Use SCRATCH_POOL for temporary allocations.
134 */
135svn_error_t *
136svn_fs_fs__p2l_index_append(svn_checksum_t **checksum,
137                            svn_fs_t *fs,
138                            apr_file_t *index_file,
139                            const char *proto_file_name,
140                            svn_revnum_t revision,
141                            apr_pool_t *result_pool,
142                            apr_pool_t *scratch_pool);
143
144/* Use the phys-to-log mapping files in FS to build a list of entries
145 * that (at least partly) overlap with the range given by BLOCK_START
146 * offset and BLOCK_SIZE in the rep / pack file containing REVISION.
147 * Return the array in *ENTRIES with svn_fs_fs__p2l_entry_t as elements,
148 * allocated in RESULT_POOL.  REV_FILE determines whether to access single
149 * rev or pack file data.  If that is not available anymore (neither in
150 * cache nor on disk), return an error.  Use SCRATCH_POOL for temporary
151 * allocations.
152 *
153 * Note that (only) the first and the last mapping may cross a cluster
154 * boundary.
155 */
156svn_error_t *
157svn_fs_fs__p2l_index_lookup(apr_array_header_t **entries,
158                            svn_fs_t *fs,
159                            svn_fs_fs__revision_file_t *rev_file,
160                            svn_revnum_t revision,
161                            apr_off_t block_start,
162                            apr_off_t block_size,
163                            apr_pool_t *result_pool,
164                            apr_pool_t *scratch_pool);
165
166/* Use the phys-to-log mapping files in FS to return the entry for the
167 * item starting at global OFFSET in the rep file containing REVISION in
168 * *ENTRY, allocated in RESULT_POOL.  Sets *ENTRY to NULL if no item starts
169 * at exactly that offset.  REV_FILE determines whether to access single
170 * rev or pack file data.  If that is not available anymore (neither in
171 * cache nor on disk), return an error.  Use SCRATCH_POOL for temporary
172 * allocations.
173 */
174svn_error_t *
175svn_fs_fs__p2l_entry_lookup(svn_fs_fs__p2l_entry_t **entry,
176                            svn_fs_t *fs,
177                            svn_fs_fs__revision_file_t *rev_file,
178                            svn_revnum_t revision,
179                            apr_off_t offset,
180                            apr_pool_t *result_pool,
181                            apr_pool_t *scratch_pool);
182
183/* For ITEM_INDEX within REV in FS, return the position in the respective
184 * rev or pack file in *ABSOLUTE_POSITION.  If TXN_ID is not NULL, return
185 * the file offset within that transaction and REV should be given as
186 * SVN_INVALID_REVNUM in that case.
187 *
188 * REV_FILE determines whether to access single rev or pack file data.
189 * If that is not available anymore (neither in cache nor on disk), re-open
190 * the rev / pack file and retry to open the index file.  For anything but
191 * committed log addressed revisions, REV_FILE may be NULL.
192 * Use SCRATCH_POOL for temporary allocations.
193 */
194svn_error_t *
195svn_fs_fs__item_offset(apr_off_t *absolute_position,
196                       svn_fs_t *fs,
197                       svn_fs_fs__revision_file_t *rev_file,
198                       svn_revnum_t revision,
199                       const svn_fs_fs__id_part_t *txn_id,
200                       apr_uint64_t item_index,
201                       apr_pool_t *scratch_pool);
202
203/* Use the log-to-phys indexes in FS to determine the maximum item indexes
204 * assigned to revision START_REV to START_REV + COUNT - 1.  That is a
205 * close upper limit to the actual number of items in the respective revs.
206 * Return the results in *MAX_IDS,  allocated in RESULT_POOL.
207 * Use SCRATCH_POOL for temporary allocations.
208 */
209svn_error_t *
210svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids,
211                           svn_fs_t *fs,
212                           svn_revnum_t start_rev,
213                           apr_size_t count,
214                           apr_pool_t *result_pool,
215                           apr_pool_t *scratch_pool);
216
217/* In *OFFSET, return the last OFFSET in the pack / rev file containing.
218 * REV_FILE determines whether to access single rev or pack file data.
219 * If that is not available anymore (neither in cache nor on disk), re-open
220 * the rev / pack file and retry to open the index file.
221 * Use SCRATCH_POOL for temporary allocations.
222 */
223svn_error_t *
224svn_fs_fs__p2l_get_max_offset(apr_off_t *offset,
225                              svn_fs_t *fs,
226                              svn_fs_fs__revision_file_t *rev_file,
227                              svn_revnum_t revision,
228                              apr_pool_t *scratch_pool);
229
230/* Index (re-)creation utilities.
231 */
232
233/* For FS, create a new L2P auto-deleting proto index file in POOL and return
234 * its name in *PROTONAME.  All entries to write are given in ENTRIES and
235 * entries are of type svn_fs_fs__p2l_entry_t* (sic!).  The ENTRIES array
236 * will be reordered.  Give the proto index file the lifetime of RESULT_POOL
237 * and use SCRATCH_POOL for temporary allocations.
238 */
239svn_error_t *
240svn_fs_fs__l2p_index_from_p2l_entries(const char **protoname,
241                                      svn_fs_t *fs,
242                                      apr_array_header_t *entries,
243                                      apr_pool_t *result_pool,
244                                      apr_pool_t *scratch_pool);
245
246/* For FS, create a new P2L auto-deleting proto index file in POOL and return
247 * its name in *PROTONAME.  All entries to write are given in ENTRIES and
248 * of type svn_fs_fs__p2l_entry_t*.  The FVN1 checksums are not taken from
249 * ENTRIES but are begin calculated from the current contents of REV_FILE
250 * as we go.  Give the proto index file the lifetime of RESULT_POOL and use
251 * SCRATCH_POOL for temporary allocations.
252 */
253svn_error_t *
254svn_fs_fs__p2l_index_from_p2l_entries(const char **protoname,
255                                      svn_fs_t *fs,
256                                      svn_fs_fs__revision_file_t *rev_file,
257                                      apr_array_header_t *entries,
258                                      apr_pool_t *result_pool,
259                                      apr_pool_t *scratch_pool);
260
261/* Serialization and caching interface
262 */
263
264/* We use this key type to address individual pages from both index types.
265 */
266typedef struct svn_fs_fs__page_cache_key_t
267{
268  /* in l2p: this is the revision of the items being mapped
269     in p2l: this is the start revision identifying the pack / rev file */
270  apr_uint32_t revision;
271
272  /* if TRUE, this is the index to a pack file
273   */
274  svn_boolean_t is_packed;
275
276  /* in l2p: page number within the revision
277   * in p2l: page number with the rev / pack file
278   */
279  apr_uint64_t page;
280} svn_fs_fs__page_cache_key_t;
281
282/*
283 * Implements svn_cache__serialize_func_t for l2p_header_t objects.
284 */
285svn_error_t *
286svn_fs_fs__serialize_l2p_header(void **data,
287                                apr_size_t *data_len,
288                                void *in,
289                                apr_pool_t *pool);
290
291/*
292 * Implements svn_cache__deserialize_func_t for l2p_header_t objects.
293 */
294svn_error_t *
295svn_fs_fs__deserialize_l2p_header(void **out,
296                                  void *data,
297                                  apr_size_t data_len,
298                                  apr_pool_t *pool);
299
300/*
301 * Implements svn_cache__serialize_func_t for l2p_page_t objects.
302 */
303svn_error_t *
304svn_fs_fs__serialize_l2p_page(void **data,
305                              apr_size_t *data_len,
306                              void *in,
307                              apr_pool_t *pool);
308
309/*
310 * Implements svn_cache__deserialize_func_t for l2p_page_t objects.
311 */
312svn_error_t *
313svn_fs_fs__deserialize_l2p_page(void **out,
314                                void *data,
315                                apr_size_t data_len,
316                                apr_pool_t *pool);
317
318/*
319 * Implements svn_cache__serialize_func_t for p2l_header_t objects.
320 */
321svn_error_t *
322svn_fs_fs__serialize_p2l_header(void **data,
323                                apr_size_t *data_len,
324                                void *in,
325                                apr_pool_t *pool);
326
327/*
328 * Implements svn_cache__deserialize_func_t for p2l_header_t objects.
329 */
330svn_error_t *
331svn_fs_fs__deserialize_p2l_header(void **out,
332                                  void *data,
333                                  apr_size_t data_len,
334                                  apr_pool_t *pool);
335
336/*
337 * Implements svn_cache__serialize_func_t for apr_array_header_t objects
338 * with elements of type svn_fs_fs__p2l_entry_t.
339 */
340svn_error_t *
341svn_fs_fs__serialize_p2l_page(void **data,
342                              apr_size_t *data_len,
343                              void *in,
344                              apr_pool_t *pool);
345
346/*
347 * Implements svn_cache__deserialize_func_t for apr_array_header_t objects
348 * with elements of type svn_fs_fs__p2l_entry_t.
349 */
350svn_error_t *
351svn_fs_fs__deserialize_p2l_page(void **out,
352                                void *data,
353                                apr_size_t data_len,
354                                apr_pool_t *pool);
355
356#endif
357