1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFBurstTrie.c 25 Copyright (c) 2008-2013, Apple Inc. All rights reserved. 26 Responsibility: Jennifer Moore 27*/ 28 29#include "CFInternal.h" 30#include "CFBurstTrie.h" 31#include "CFByteOrder.h" 32#include "CFNumber.h" 33#include <stdio.h> 34#include <string.h> 35#include <stdlib.h> 36#include <fcntl.h> 37#include <sys/stat.h> 38#include <limits.h> 39#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX 40#include <unistd.h> 41#include <sys/param.h> 42#include <sys/mman.h> 43#endif 44 45#include <errno.h> 46#include <assert.h> 47 48#if DEPLOYMENT_TARGET_WINDOWS 49#define open _NS_open 50#define statinfo _stat 51#define stat(x,y) _NS_stat(x,y) 52#define __builtin_memcmp(x, y, z) memcmp(x, y, z) 53#define __builtin_popcountll(x) popcountll(x) 54#define bzero(dst, size) ZeroMemory(dst, size) 55#define S_IWUSR 0 56#define S_IRUSR 0 57 58static int pwrite(int fd, const void *buf, size_t nbyte, off_t offset) { 59 // Get where we are 60 long pos = _tell(fd); 61 62 // Move to new offset 63 _lseek(fd, offset, SEEK_SET); 64 65 // Write data 66 int res = _write(fd, buf, nbyte); 67 68 // Return to previous offset 69 _lseek(fd, pos, SEEK_SET); 70 71 return res; 72} 73 74#else 75#define statinfo stat 76#endif 77 78#if 0 79#pragma mark Types and Utilities 80#endif 81 82#define MAX_STRING_ALLOCATION_SIZE 342 83#define MAX_STRING_SIZE 1024 84#define MAX_KEY_LENGTH MAX_STRING_SIZE * 4 85#define CHARACTER_SET_SIZE 256 86#define MAX_LIST_SIZE 256 // 64 87#define MAX_BITMAP_SIZE 200 88#define MAX_BUFFER_SIZE (4096<<2) 89 90#define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3)) 91#define NextTrie_GetKind(p) (p & 3) 92#define NextTrie_SetKind(p, kind) (p |= (3&kind)) 93 94#define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3))) 95#define DiskNextTrie_GetKind(p) (p & 3) 96#define DiskNextTrie_SetKind(p, kind) (p |= (3&kind)) 97 98// Use this macro to avoid forgetting to check the pointer before assigning value to it. 99#define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0) 100 101enum { Nothing = 0, TrieKind = 1, ListKind = 2, CompactTrieKind = 3 }; 102typedef enum { FailedInsert = 0, NewTerm = 1, ExistingTerm = 2 } CFBTInsertCode; 103 104#pragma pack (1) 105typedef uintptr_t NextTrie; 106 107typedef struct _TrieLevel { 108 NextTrie slots[CHARACTER_SET_SIZE]; 109 uint32_t weight; 110 uint32_t payload; 111} TrieLevel; 112typedef TrieLevel *TrieLevelRef; 113 114typedef struct _MapTrieLevel { 115 uint32_t slots[CHARACTER_SET_SIZE]; 116 uint32_t payload; 117} MapTrieLevel; 118typedef MapTrieLevel *MapTrieLevelRef; 119 120typedef struct _CompactMapTrieLevel { 121 uint64_t bitmap[CHARACTER_SET_SIZE / 64]; 122 uint32_t payload; 123 uint32_t slots[]; 124} CompactMapTrieLevel; 125typedef CompactMapTrieLevel *CompactMapTrieLevelRef; 126 127typedef struct _ListNode { 128 struct _ListNode *next; 129 uint32_t weight; 130 uint32_t payload; 131 uint16_t length; 132 UInt8 string[]; 133}* ListNodeRef; 134 135typedef struct _Page { 136 uint32_t length; 137 char data[]; 138} Page; 139 140typedef struct _PageEntryPacked { 141 uint8_t pfxLen; 142 uint16_t strlen; 143 uint32_t payload; 144 UInt8 string[]; 145} PageEntryPacked; 146 147typedef struct _PageEntry { 148 uint16_t strlen; 149 uint32_t payload; 150 UInt8 string[]; 151} PageEntry; 152 153typedef struct _TrieHeader { 154 uint32_t signature; 155 uint32_t rootOffset; 156 uint32_t count; 157 uint32_t size; 158 uint32_t flags; 159 uint64_t reserved[16]; 160} TrieHeader; 161 162typedef struct _TrieCursor { 163 uint64_t signature; 164 uint64_t counter; 165 NextTrie next; 166 uint32_t keylen; 167 uint32_t prefixlen; 168 const uint8_t *prefix; 169 uint8_t key[MAX_KEY_LENGTH]; 170} TrieCursor; 171 172typedef struct _MapCursor { 173 uint64_t signature; 174 TrieHeader *header; 175 uint32_t next; 176 uint32_t prefixlen; 177 uint32_t keylen; 178 const uint8_t *prefix; 179 uint8_t key[MAX_STRING_SIZE*4]; 180} MapCursor; 181 182typedef struct _CompactMapCursor { 183 uint32_t next; 184 uint32_t entryOffsetInPage; 185 uint32_t offsetInEntry; 186 uint32_t payload; 187 // On a page, the first entry could has 0 strlen. So we need this variable to tell us whether 188 // the cursor is merely pointing at the beginning of the page, or the first entry. 189 // For example, if the trie contains "ab" and "abc", where "a" is stored on an array level, 190 // while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor 191 // will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and 192 // offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the 193 // trie. the page level will actually contains three entries. The first entry corresponds to string "a". 194 // That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will 195 // point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still 196 // set to 0s. So we need an additional variable to make distinction between these two situations. 197 BOOL isOnPage; 198} CompactMapCursor; 199typedef struct _CompactMapCursor *MapCursorRef; 200 201enum { 202 _kCFBurstTrieCursorTrieType = 0, 203 _kCFBurstTrieCursorMapType 204}; 205 206typedef struct _CFBurstTrieCursor { 207 CompactMapCursor mapCursor; 208 CFIndex cursorType; 209 CFBurstTrieRef trie; 210} _CFBurstTrieCursor; 211 212// ** Legacy 213typedef struct _DiskTrieLevel { 214 uint32_t slots[CHARACTER_SET_SIZE]; 215 uint32_t weight; 216 uint32_t payload; 217} DiskTrieLevel; 218typedef DiskTrieLevel *DiskTrieLevelRef; 219 220typedef struct _CompactDiskTrieLevel { 221 uint64_t bitmap[CHARACTER_SET_SIZE / 64]; // CHARACTER_SET_SIZE / 64bits per word 222 uint32_t weight; 223 uint32_t payload; 224 uint32_t slots[]; 225} CompactDiskTrieLevel; 226typedef CompactDiskTrieLevel *CompactDiskTrieLevelRef; 227 228typedef struct _StringPage { 229 uint32_t length; 230 char data[]; 231} StringPage; 232 233typedef struct _StringPageEntryPacked { 234 uint8_t pfxLen; 235 uint16_t strlen; // make uint8_t if possible 236 uint32_t payload; 237 char string[]; 238} StringPageEntryPacked; 239 240typedef struct _StringPageEntry { 241 uint16_t strlen; // make uint8_t if possible 242 uint32_t payload; 243 char string[]; 244} StringPageEntry; 245 246typedef struct _fileHeader { 247 uint32_t signature; 248 uint32_t rootOffset; 249 uint32_t count; 250 uint32_t size; 251 uint32_t flags; 252} fileHeader; 253// ** 254#pragma pack() 255 256struct _CFBurstTrie { 257 union { 258 TrieLevel root; 259 DiskTrieLevel diskRoot; 260 MapTrieLevel maproot; 261 }; 262 char *mapBase; 263 uint32_t mapSize; 264 uint32_t mapOffset; 265 uint32_t cflags; 266 uint32_t count; 267 uint32_t containerSize; 268 int retain; 269#if DEPLOYMENT_TARGET_WINDOWS 270 HANDLE mapHandle; 271 HANDLE mappedFileHandle; 272#endif 273}; 274 275#if 0 276#pragma mark - 277#pragma mark Forward declarations 278#endif 279 280typedef struct _TraverseContext { 281 void *context; 282 void (*callback)(void*, const UInt8*, uint32_t, uint32_t); 283} TraverseContext; 284 285static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact) 286{ 287 if (context != NULL) { 288 TraverseContext *ctx = (TraverseContext *)context; 289 if (ctx->context && ctx->callback) { 290 ctx->callback(ctx->context, key, 1, payload); 291 } 292 } 293 return false; 294} 295 296void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 297 298static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload); 299 300static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)); 301static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)); 302static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 303static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 304static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 305static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 306 307static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd); 308 309static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); 310static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); 311static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); 312 313static void destroyCFBurstTrie(CFBurstTrieRef trie); 314static void finalizeCFBurstTrie(TrieLevelRef trie); 315static void finalizeCFBurstTrieList(ListNodeRef node); 316 317static int nodeWeightCompare(const void *a, const void *b); 318static int nodeStringCompare(const void *a, const void *b); 319 320static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact); 321static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact); 322 323static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer); 324 325static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length); 326static Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload); 327static void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination); 328static Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs); 329static void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback); 330static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload); 331static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload); 332 333CFBurstTrieRef CFBurstTrieCreateWithOptions(CFDictionaryRef options) { 334 CFBurstTrieRef trie = NULL; 335 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); 336 trie->containerSize = MAX_LIST_SIZE; 337 338 CFNumberRef valueAsCFNumber; 339 if (CFDictionaryGetValueIfPresent(options, kCFBurstTrieCreationOptionNameContainerSize, (const void **)&valueAsCFNumber)) { 340 int value; 341 CFNumberGetValue(valueAsCFNumber, kCFNumberIntType, &value); 342 trie->containerSize = value > 2 && value < 4096 ? value : MAX_LIST_SIZE; 343 } 344 trie->retain = 1; 345 return trie; 346} 347 348CFBurstTrieRef CFBurstTrieCreate() { 349 CFBurstTrieRef trie = NULL; 350 int listSize = MAX_LIST_SIZE; 351 CFNumberRef value = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &listSize); 352 CFMutableDictionaryRef options = CFDictionaryCreateMutable(kCFAllocatorDefault, 1, NULL, NULL); 353 CFDictionarySetValue(options, kCFBurstTrieCreationOptionNameContainerSize, value); 354 trie = CFBurstTrieCreateWithOptions(options); 355 CFRelease(value); 356 CFRelease(options); 357 return trie; 358} 359 360CFBurstTrieRef CFBurstTrieCreateFromFile(CFStringRef path) { 361 struct statinfo sb; 362 char filename[PATH_MAX]; 363 int fd; 364 365 /* Check valid path name */ 366 if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return NULL; 367 368 /* Check if file exists */ 369 if (stat(filename, &sb) != 0) return NULL; 370 371 /* Check if file can be opened */ 372 if ((fd=open(filename, CF_OPENFLGS|O_RDONLY)) < 0) return NULL; 373 374#if DEPLOYMENT_TARGET_WINDOWS 375 HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd); 376 if (!mappedFileHandle) return NULL; 377 378 HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); 379 if (!mapHandle) return NULL; 380 381 char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, 0, sb.st_size); 382 if (!map) return NULL; 383#else 384 char *map = mmap(0, sb.st_size, PROT_READ, MAP_FILE|MAP_SHARED, fd, 0); 385#endif 386 387 CFBurstTrieRef trie = NULL; 388 TrieHeader *header = (TrieHeader *)map; 389 390 if (((uint32_t*)map)[0] == 0xbabeface) { 391 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); 392 trie->mapBase = map; 393 trie->mapSize = CFSwapInt32LittleToHost(sb.st_size); 394 trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset); 395 trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags); 396 trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count); 397 trie->retain = 1; 398#if DEPLOYMENT_TARGET_WINDOWS 399 trie->mappedFileHandle = mappedFileHandle; 400 trie->mapHandle = mapHandle; 401#else 402 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here. 403 close(fd); 404#endif 405 } else if (header->signature == 0xcafebabe || header->signature == 0x0ddba11) { 406 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); 407 trie->mapBase = map; 408 trie->mapSize = CFSwapInt32LittleToHost(sb.st_size); 409 trie->cflags = CFSwapInt32LittleToHost(header->flags); 410 trie->count = CFSwapInt32LittleToHost(header->count); 411 trie->retain = 1; 412#if DEPLOYMENT_TARGET_WINDOWS 413 trie->mappedFileHandle = mappedFileHandle; 414 trie->mapHandle = mapHandle; 415#else 416 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here. 417 close(fd); 418#endif 419 } 420 return trie; 421} 422 423CFBurstTrieRef CFBurstTrieCreateFromMapBytes(char *mapBase) { 424 CFBurstTrieRef trie = NULL; 425 TrieHeader *header = (TrieHeader *)mapBase; 426 427 if (mapBase && ((uint32_t*)mapBase)[0] == 0xbabeface) { 428 trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie)); 429 trie->mapBase = mapBase; 430 trie->mapSize = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->size); 431 trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset); 432 trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags); 433 trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count); 434 trie->retain = 1; 435 } else if (mapBase && (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) { 436 trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie)); 437 trie->mapBase = mapBase; 438 trie->mapSize = CFSwapInt32LittleToHost(header->size); 439 trie->cflags = CFSwapInt32LittleToHost(header->flags); 440 trie->count = CFSwapInt32LittleToHost(header->count); 441 trie->retain = 1; 442 } 443 return trie; 444} 445 446Boolean CFBurstTrieInsert(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex payload) { 447 return CFBurstTrieAddWithWeight(trie, term, termRange, 1, (uint32_t)payload); 448} 449 450Boolean CFBurstTrieAdd(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t payload) { 451 return CFBurstTrieAddWithWeight(trie, term, termRange, 1, payload); 452} 453 454Boolean CFBurstTrieInsertCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex payload) { 455 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, (uint32_t)payload); 456} 457 458Boolean CFBurstTrieAddCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t payload) { 459 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, payload); 460} 461 462Boolean CFBurstTrieInsertUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex payload) { 463 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, (uint32_t)payload); 464} 465 466Boolean CFBurstTrieAddUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t payload) { 467 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, payload); 468} 469 470Boolean CFBurstTrieInsertWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex weight, CFIndex payload) { 471 return CFBurstTrieAddWithWeight(trie, term, termRange, weight, (uint32_t)payload); 472} 473 474Boolean CFBurstTrieAddWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t weight, uint32_t payload) { 475 Boolean success = false; 476 CFIndex size = MAX_STRING_ALLOCATION_SIZE; 477 CFIndex bytesize = termRange.length * 4; //** 4-byte max character size 478 if (!trie->mapBase && termRange.length < MAX_STRING_SIZE && payload > 0) { 479 CFIndex length; 480 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; 481 UInt8 *key = buffer; 482 if (bytesize >= size) { 483 size = bytesize; 484 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); 485 } 486 CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length); 487 key[length] = 0; 488 489 success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload); 490 if (buffer != key) free(key); 491 } 492 return success; 493} 494 495Boolean CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex weight, CFIndex payload) { 496 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, weight, (uint32_t)payload); 497} 498 499Boolean CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t weight, uint32_t payload) { 500 Boolean success = false; 501 CFIndex size = MAX_STRING_ALLOCATION_SIZE; 502 CFIndex bytesize = numChars * 4; //** 4-byte max character size 503 if (!trie->mapBase && numChars < MAX_STRING_SIZE && payload > 0) { 504 CFIndex length; 505 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; 506 UInt8 *key = buffer; 507 if (bytesize >= size) { 508 size = bytesize; 509 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); 510 } 511 length = burstTrieConvertCharactersToUTF8(chars, numChars, key); 512 key[length] = 0; 513 514 success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload); 515 if (buffer != key) free(key); 516 } 517 return success; 518} 519 520Boolean CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex weight, CFIndex payload) { 521 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, weight, (uint32_t)payload); 522} 523 524Boolean CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t weight, uint32_t payload) { 525 CFBTInsertCode code = FailedInsert; 526 527 if (!trie->mapBase && numChars < MAX_STRING_SIZE*4 && payload > 0) { 528 code = addCFBurstTrieLevel(trie, &trie->root, chars, numChars, weight, payload); 529 if (code == NewTerm) trie->count++; 530 } 531 return code > FailedInsert; 532} 533 534Boolean CFBurstTrieFind(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex *payload) { 535 uint32_t p; 536 if (CFBurstTrieContains(trie, term, termRange, &p)) { 537 SetPayload(payload, p); 538 return true; 539 } 540 return false; 541} 542 543Boolean CFBurstTrieContains(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t *payload) { 544 Boolean success = false; 545 CFIndex size = MAX_STRING_ALLOCATION_SIZE; 546 CFIndex bytesize = termRange.length * 4; //** 4-byte max character size 547 if (termRange.length < MAX_STRING_SIZE) { 548 CFIndex length; 549 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE+1]; 550 UInt8 *key = buffer; 551 if (bytesize >= size) { 552 size = bytesize; 553 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); 554 } 555 CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length); 556 key[length] = 0; 557 558 success = CFBurstTrieContainsUTF8String(trie, key, length, payload); 559 if (buffer != key) free(key); 560 } 561 return success; 562} 563 564Boolean CFBurstTrieFindCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex *payload) { 565 uint32_t p; 566 if (CFBurstTrieContainsCharacters(trie, chars, numChars, &p)) { 567 SetPayload(payload, p); 568 return true; 569 } 570 return false; 571} 572 573Boolean CFBurstTrieContainsCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t *payload) { 574 Boolean success = false; 575 CFIndex size = MAX_STRING_ALLOCATION_SIZE; 576 CFIndex bytesize = numChars * 4; //** 4-byte max character size 577 if (numChars < MAX_STRING_SIZE) { 578 CFIndex length; 579 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; 580 UInt8 *key = buffer; 581 if (bytesize >= size) { 582 size = bytesize; 583 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); 584 } 585 length = burstTrieConvertCharactersToUTF8(chars, numChars, key); 586 key[length] = 0; 587 588 success = CFBurstTrieContainsUTF8String(trie, key, length, payload); 589 if (buffer != key) free(key); 590 } 591 return success; 592} 593 594Boolean CFBurstTrieFindUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, CFIndex *payload) { 595 uint32_t p; 596 if (CFBurstTrieContainsUTF8String(trie, key, length, &p)) { 597 SetPayload(payload, p); 598 return true; 599 } 600 return false; 601} 602 603Boolean CFBurstTrieContainsUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, uint32_t *payload) { 604 Boolean success = false; 605 if (length < MAX_STRING_SIZE) { 606 if (trie->mapBase && ((fileHeader *)trie->mapBase)->signature == 0xbabeface) { 607 bool prefix = (trie->cflags & kCFBurstTriePrefixCompression); 608 success = burstTrieMappedFind((DiskTrieLevelRef)(trie->mapBase+CFSwapInt32LittleToHost((((uint32_t*)trie->mapBase)[1]))), trie->mapBase, key, length, payload, prefix); 609 } else if (trie->mapBase && trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey)) { 610 _CFBurstTrieCursor cursor; 611 if (!CFBurstTrieSetCursorForBytes(trie, &cursor, key, length)) 612 return FALSE; 613 return CFBurstTrieCursorGetPayload(&cursor, payload); 614 } else { 615 uint32_t found = 0; 616 void *cursor = 0; 617 traverseCFBurstTrieWithCursor(trie, key, length, &cursor, true, &found, containsKey); 618 if (found) SetPayload(payload, found); 619 success = found > 0; 620 } 621 } 622 return success; 623} 624 625Boolean CFBurstTrieSerialize(CFBurstTrieRef trie, CFStringRef path, CFBurstTrieOpts opts) { 626 Boolean success = false; 627 if (trie->mapBase) { 628 return success; 629 } else { 630 int fd; 631 char filename[PATH_MAX]; 632 633 /* Check valid path name */ 634 if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return success; 635 636 /* Check if file can be opened */ 637 if ((fd=open(filename, CF_OPENFLGS|O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) < 0) return success; 638 639 if (CFBurstTrieSerializeWithFileDescriptor(trie, fd, opts)) success = true; 640 641 close(fd); 642 } 643 return success; 644} 645 646Boolean CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie, int fd, CFBurstTrieOpts opts) { 647 Boolean success = false; 648 if (!trie->mapBase && fd >= 0) { 649 off_t start_offset = lseek(fd, 0, SEEK_END); 650 651 trie->cflags = opts; 652 trie->mapSize = serializeCFBurstTrie(trie, start_offset, fd); 653 654#if DEPLOYMENT_TARGET_WINDOWS 655 HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd); 656 // We need to make sure we have our own handle to keep this file open as long as the mmap lasts 657 DuplicateHandle(GetCurrentProcess(), mappedFileHandle, GetCurrentProcess(), &mappedFileHandle, 0, 0, DUPLICATE_SAME_ACCESS); 658 HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); 659 if (!mapHandle) return NULL; 660 char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, start_offset, trie->mapSize); 661 if (!map) return NULL; 662 trie->mapBase = map; 663 trie->mapHandle = mapHandle; 664 trie->mappedFileHandle = mappedFileHandle; 665#else 666 trie->mapBase = mmap(0, trie->mapSize, PROT_READ, MAP_FILE|MAP_SHARED, fd, start_offset); 667#endif 668 success = true; 669 } 670 671 return success; 672} 673 674void CFBurstTrieTraverse(CFBurstTrieRef trie, void *ctx, void (*callback)(void*, const UInt8*, uint32_t, uint32_t)) { 675 TrieHeader *header = (TrieHeader *)trie->mapBase; 676 if (!trie->mapBase || (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) { 677 void *cursor = 0; 678 TraverseContext context; 679 context.context = ctx; 680 context.callback = callback; 681 traverseCFBurstTrieWithCursor(trie, (const uint8_t *)"", 0, &cursor, false, &context, foundKey); 682 } 683} 684 685 686void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) 687{ 688 traverseCFBurstTrieWithCursor(trie, prefix, prefixLen, cursor, false, ctx, callback); 689} 690 691void CFBurstTriePrint(CFBurstTrieRef trie) { 692 693} 694 695CFIndex CFBurstTrieGetCount(CFBurstTrieRef trie) { 696 return trie->count; 697} 698 699CFBurstTrieRef CFBurstTrieRetain(CFBurstTrieRef trie) { 700 trie->retain++; 701 return trie; 702} 703 704void CFBurstTrieRelease(CFBurstTrieRef trie) { 705 trie->retain--; 706 if (trie->retain == 0) destroyCFBurstTrie(trie); 707 return; 708} 709 710Boolean CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie, CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length) 711{ 712 if (!trie->mapBase || !(trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey))) { 713 //fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n"); 714 return FALSE; 715 } 716 717 TrieHeader *header = (TrieHeader*)trie->mapBase; 718 if (length < 0 || !trie) 719 return FALSE; 720 721 cursor->trie = trie; 722 if (trie->mapBase) { 723 cursor->cursorType = _kCFBurstTrieCursorMapType; 724 cursor->mapCursor.next = header->rootOffset; 725 cursor->mapCursor.isOnPage = FALSE; 726 cursor->mapCursor.entryOffsetInPage = 0; 727 cursor->mapCursor.offsetInEntry = 0; 728 cursor->mapCursor.payload = 0; 729 } else 730 assert(false); 731 732 if (!bytes || length == 0) 733 return TRUE; 734 735 return CFBurstTrieCursorAdvanceForBytes(cursor, bytes, length); 736} 737 738 739CFBurstTrieCursorRef CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie, const UInt8* bytes, CFIndex length) 740{ 741 CFBurstTrieCursorRef cursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1); 742 if (!CFBurstTrieSetCursorForBytes(trie, cursor, bytes, length)) { 743 CFBurstTrieCursorRelease(cursor); 744 return NULL; 745 } 746 return cursor; 747} 748 749CFBurstTrieCursorRef CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor) 750{ 751 if (!cursor) 752 return NULL; 753 754 CFBurstTrieCursorRef newCursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1); 755 switch (cursor->cursorType) { 756 case _kCFBurstTrieCursorMapType: 757 copyMapCursor(&cursor->mapCursor, &newCursor->mapCursor); 758 break; 759 case _kCFBurstTrieCursorTrieType: 760 assert(false); 761 break; 762 } 763 newCursor->cursorType = cursor->cursorType; 764 newCursor->trie = cursor->trie; 765 return newCursor; 766} 767 768Boolean CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs, CFBurstTrieCursorRef rhs) 769{ 770 if (lhs->trie != rhs->trie || lhs->cursorType != rhs->cursorType) 771 return FALSE; 772 773 if (lhs->cursorType == _kCFBurstTrieCursorMapType) 774 return areMapCursorsEqual(&lhs->mapCursor, &rhs->mapCursor); 775 else 776 return FALSE; 777} 778 779Boolean CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length) 780{ 781 switch (cursor->cursorType) { 782 case _kCFBurstTrieCursorMapType: { 783 CompactMapCursor tempCursor; 784 copyMapCursor(&cursor->mapCursor, &tempCursor); 785 if (advanceMapCursor(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, bytes, length)) 786 return TRUE; 787 else { 788 copyMapCursor(&tempCursor, &cursor->mapCursor); 789 return FALSE; 790 } 791 } 792 case _kCFBurstTrieCursorTrieType: 793 return FALSE; 794 } 795 return FALSE; 796} 797 798Boolean CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor, uint32_t *payload) 799{ 800 switch (cursor->cursorType) { 801 case _kCFBurstTrieCursorMapType: 802 return getMapCursorPayload(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, payload); 803 case _kCFBurstTrieCursorTrieType: 804 return FALSE; 805 } 806 return FALSE; 807} 808 809void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor) 810{ 811 if (!cursor) 812 return; 813 free(cursor); 814} 815 816void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor, void *ctx, CFBurstTrieTraversalCallback callback) 817{ 818 if (!cursor) 819 return; 820 821 UInt8 *bytes = (UInt8*)calloc(1, MAX_KEY_LENGTH); 822 uint32_t capacity = MAX_KEY_LENGTH; 823 uint32_t length = 0; 824 Boolean stop = FALSE; 825 switch (cursor->cursorType) { 826 case _kCFBurstTrieCursorMapType: { 827 CompactMapCursor tempCursor; 828 copyMapCursor(&cursor->mapCursor, &tempCursor); 829 traverseFromMapCursor(cursor->trie, &tempCursor, bytes, capacity,length, &stop, ctx, callback); 830 break; 831 } 832 case _kCFBurstTrieCursorTrieType: 833 break; 834 } 835 free(bytes); 836} 837 838#if 0 839#pragma mark - 840#pragma mark Insertion 841#endif 842 843static ListNodeRef makeCFBurstTrieListNode(const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) { 844 ListNodeRef node = (ListNodeRef) calloc(1, sizeof(*node) + keylen + 1); 845 memcpy(node->string, key, keylen); 846 node->string[keylen] = 0; 847 node->next = 0; 848 node->length = keylen; 849 node->weight = weight; 850 node->payload = payload; 851 return node; 852} 853 854static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) { 855 if (keylen) { 856 NextTrie next = root->slots[*key]; 857 ListNodeRef newNode = makeCFBurstTrieListNode(key+1, keylen-1, weight, payload); 858 newNode->weight = weight; 859 newNode->next = (ListNodeRef) NextTrie_GetPtr(next); 860 next = (uintptr_t) newNode; 861 NextTrie_SetKind(next, ListKind); 862 root->slots[*key] = next; 863 } else { 864 // ** Handle payload. 865 root->weight = weight; 866 root->payload = payload; 867 } 868} 869 870static TrieLevelRef burstCFBurstTrieLevel(CFBurstTrieRef trie, ListNodeRef list, uint32_t listCount) { 871 TrieLevelRef newLevel = (TrieLevelRef) calloc(1, sizeof(struct _TrieLevel)); 872 while (list) { 873 addCFBurstTrieBurstLevel(trie, newLevel, list->string, list->length, list->weight, list->payload); 874 ListNodeRef temp = list; 875 list = list->next; 876 free(temp); 877 } 878 return newLevel; 879} 880 881static CFBTInsertCode addCFBurstTrieListNode(CFBurstTrieRef trie, ListNodeRef list, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload, uint32_t *listCount) 882{ 883 CFBTInsertCode code = FailedInsert; 884 uint32_t count = 1; 885 886 ListNodeRef last = list; 887 while (list) { 888 if (list->length == keylen && memcmp(key, list->string, keylen) == 0) { 889 list->weight += weight; 890 list->payload = payload; 891 code = ExistingTerm; 892 break; 893 } else { 894 count++; 895 last = list; 896 list = list->next; 897 } 898 } 899 900 if (!list) { 901 last->next = makeCFBurstTrieListNode(key, keylen, weight, payload); 902 code = NewTerm; 903 } 904 905 *listCount = count; 906 return code; 907} 908 909static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) 910{ 911 CFBTInsertCode code = FailedInsert; 912 if (keylen) { 913 NextTrie next = root->slots[*key]; 914 if (NextTrie_GetKind(next) == TrieKind) { 915 TrieLevelRef nextLevel = (TrieLevelRef) NextTrie_GetPtr(next); 916 code = addCFBurstTrieLevel(trie, nextLevel, key+1, keylen-1, weight, payload); 917 } else { 918 if (NextTrie_GetKind(next) == ListKind) { 919 uint32_t listCount; 920 ListNodeRef listNode = (ListNodeRef) NextTrie_GetPtr(next); 921 code = addCFBurstTrieListNode(trie, listNode, key+1, keylen-1, weight, payload, &listCount); 922 if (listCount > trie->containerSize) { 923 next = (uintptr_t) burstCFBurstTrieLevel(trie, listNode, listCount); 924 NextTrie_SetKind(next, TrieKind); 925 } 926 } else { 927 // ** Make a new list node 928 next = (uintptr_t) makeCFBurstTrieListNode(key+1, keylen-1, weight, payload); 929 NextTrie_SetKind(next, ListKind); 930 code = NewTerm; 931 } 932 root->slots[*key] = next; 933 } 934 } else { 935 // ** Handle payload 936 if (!root->weight) code = NewTerm; 937 else code = ExistingTerm; 938 root->weight += weight; 939 root->payload = payload; 940 } 941 942 return code; 943} 944#if 0 945#pragma mark - 946#pragma mark Searching 947#endif 948static void findCFBurstTrieList(CFBurstTrieRef trie, TrieCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 949{ 950 ListNodeRef list = (ListNodeRef)NextTrie_GetPtr(cursor->next); 951 int len = cursor->prefixlen-cursor->keylen; 952 len = len <= 0 ? 0 : len; 953 while (list) { 954 int lencompare = list->length-len; 955 if (list->length >= len && 956 (len == 0 || memcmp(list->string, cursor->prefix+cursor->keylen, len) == 0)) { 957 memcpy(cursor->key+cursor->keylen, list->string, list->length); 958 cursor->key[cursor->keylen+list->length] = 0; 959 cursor->next = (NextTrie)list; 960 if (list->payload && callback(ctx, cursor->key, list->payload, lencompare==0)) return; 961 } 962 list = list->next; 963 } 964} 965 966static void findCFBurstTrieMappedPage(CFBurstTrieRef trie, MapCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 967{ 968 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 969 uint32_t end = page->length; 970 uint32_t cur = 0; 971 int len = cursor->prefixlen-cursor->keylen; 972 len = len <= 0 ? 0 : len; 973 if (trie->cflags & kCFBurstTriePrefixCompression) { 974 uint8_t pfx[CHARACTER_SET_SIZE]; 975 PageEntryPacked *lastEntry = 0; 976 while (cur < end) { 977 PageEntryPacked *entry = (PageEntryPacked *)&page->data[cur]; 978 int lencompare = (entry->strlen+entry->pfxLen)-len; 979 if (lastEntry && entry->pfxLen>lastEntry->pfxLen) memcpy(pfx+lastEntry->pfxLen, lastEntry->string, entry->pfxLen-lastEntry->pfxLen); 980 if (lencompare >= 0 && 981 (len == 0 || (__builtin_memcmp(pfx, cursor->prefix+cursor->keylen, entry->pfxLen) == 0 && 982 __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen+entry->pfxLen, cursor->prefixlen-cursor->keylen-entry->pfxLen) == 0))) { 983 memcpy(cursor->key+cursor->keylen, pfx, entry->pfxLen); 984 memcpy(cursor->key+cursor->keylen+entry->pfxLen, entry->string, entry->strlen); 985 cursor->key[cursor->keylen+entry->pfxLen+entry->strlen] = 0; 986 if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return; 987 } 988 lastEntry = entry; 989 cur += sizeof(*entry) + entry->strlen; 990 } 991 } else { 992 while (cur < end) { 993 PageEntry *entry = (PageEntry *)&page->data[cur]; 994 int lencompare = entry->strlen-len; 995 if (lencompare >= 0 && __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen, len) == 0) { 996 memcpy(cursor->key+cursor->keylen, entry->string, entry->strlen); 997 cursor->key[cursor->keylen+entry->strlen] = 0; 998 if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return; 999 } 1000 cur += sizeof(*entry) + entry->strlen; 1001 } 1002 } 1003} 1004 1005 1006static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 1007{ 1008 if (cursor->keylen < cursor->prefixlen) { 1009 cursor->next = ((TrieLevelRef)NextTrie_GetPtr(cursor->next))->slots[cursor->prefix[cursor->keylen]]; 1010 cursor->key[cursor->keylen++] = cursor->prefix[cursor->keylen]; 1011 1012 if (NextTrie_GetKind(cursor->next) == TrieKind) { 1013 findCFBurstTrieLevel(trie, cursor, exactmatch, ctx, callback); 1014 } else if (NextTrie_GetKind(cursor->next) == ListKind) { 1015 findCFBurstTrieList(trie, cursor, ctx, callback); 1016 } 1017 } else { 1018 TrieLevelRef root = (TrieLevelRef)NextTrie_GetPtr(cursor->next); 1019 if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; 1020 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1021 traverseCFBurstTrieLevel(trie, root, cursor, exactmatch, ctx, callback); 1022 } 1023} 1024 1025static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 1026{ 1027 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1028 if (cursor->keylen < cursor->prefixlen) { 1029 uint8_t mykey = *(cursor->prefix+cursor->keylen); 1030 cursor->key[cursor->keylen++] = *(cursor->prefix+cursor->keylen); 1031 1032 uint8_t slot = mykey / 64; 1033 uint8_t bit = mykey % 64; 1034 uint32_t item = 0; 1035 uint64_t bword = root->bitmap[slot]; 1036 1037 if (bword & (1ull << bit)) { 1038 // ** Count all the set bits up to this bit 1039 for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]); 1040 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1041 uint32_t offset = root->slots[item]; 1042 cursor->next = offset; 1043 1044 if (DiskNextTrie_GetKind(offset) == TrieKind) { 1045 findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback); 1046 } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1047 findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback); 1048 } else if (DiskNextTrie_GetKind(offset) == ListKind) { 1049 findCFBurstTrieMappedPage(trie, cursor, ctx, callback); 1050 } 1051 } 1052 } else { 1053 if(root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; 1054 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1055 traverseCFBurstTrieCompactMappedLevel(trie, root, cursor, exactmatch, ctx, callback); 1056 } 1057} 1058 1059static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 1060{ 1061 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1062 if (cursor->keylen < cursor->prefixlen) { 1063 uint8_t slot = *(cursor->prefix+cursor->keylen); 1064 cursor->next = root->slots[slot]; 1065 cursor->key[cursor->keylen++] = slot; 1066 1067 if (DiskNextTrie_GetKind(cursor->next) == TrieKind) { 1068 findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback); 1069 } else if (DiskNextTrie_GetKind(cursor->next) == CompactTrieKind) { 1070 findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback); 1071 } else if (DiskNextTrie_GetKind(cursor->next) == ListKind) { 1072 findCFBurstTrieMappedPage(trie, cursor, ctx, callback); 1073 } 1074 } else { 1075 if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; 1076 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1077 traverseCFBurstTrieMappedLevel(trie, root, cursor, exactmatch, ctx, callback); 1078 } 1079} 1080 1081static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) 1082{ 1083 cursor->key[cursor->keylen] = 0; 1084 uint32_t len = cursor->keylen; 1085 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1086 NextTrie next = root->slots[i]; 1087 cursor->keylen = len; 1088 cursor->key[cursor->keylen++] = i; 1089 1090 if (NextTrie_GetKind(next) == TrieKind) { 1091 TrieLevelRef level = (TrieLevelRef)NextTrie_GetPtr(next); 1092 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; 1093 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1094 traverseCFBurstTrieLevel(trie, level, cursor, exactmatch, ctx, callback); 1095 } else if (NextTrie_GetKind(next) == ListKind) { 1096 cursor->next = next; 1097 cursor->key[cursor->keylen] = 0; 1098 findCFBurstTrieList(trie, cursor, ctx, callback); 1099 } 1100 } 1101} 1102 1103static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) 1104{ 1105 cursor->key[cursor->keylen] = 0; 1106 uint32_t len = cursor->keylen; 1107 1108 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1109 uint32_t offset = (uint32_t)root->slots[i]; 1110 cursor->keylen = len; 1111 cursor->key[cursor->keylen++] = i; 1112 1113 if (DiskNextTrie_GetKind(offset) == TrieKind) { 1114 MapTrieLevelRef level = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); 1115 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; 1116 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1117 traverseCFBurstTrieMappedLevel(trie, level, cursor, exactmatch, ctx, callback); 1118 } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1119 CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); 1120 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; 1121 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1122 traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback); 1123 } else if (DiskNextTrie_GetKind(offset) == ListKind) { 1124 cursor->next = offset; 1125 cursor->key[cursor->keylen] = 0; 1126 findCFBurstTrieMappedPage(trie, cursor, ctx, callback); 1127 } 1128 } 1129} 1130 1131static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) 1132{ 1133 cursor->key[cursor->keylen] = 0; 1134 uint32_t len = cursor->keylen; 1135 for (uint32_t c=0; c < CHARACTER_SET_SIZE; c++) { 1136 //** This could be optimized to remember what the last slot/item was and not count bits over and over. 1137 uint8_t mykey = c; 1138 uint32_t slot = mykey / 64; 1139 uint32_t bit = mykey % 64; 1140 uint32_t item = 0; 1141 uint64_t bword = root->bitmap[slot]; 1142 cursor->keylen = len; 1143 1144 if (bword & (1ull << bit)) { 1145 // ** Count all the set bits up to this bit 1146 for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]); 1147 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1148 uint32_t offset = root->slots[item]; 1149 cursor->key[cursor->keylen++] = mykey; 1150 1151 if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1152 CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); 1153 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; 1154 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1155 traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback); 1156 } else if(DiskNextTrie_GetKind(offset) == TrieKind) { 1157 traverseCFBurstTrieMappedLevel(trie, (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset), cursor, exactmatch, ctx, callback); 1158 } else if (DiskNextTrie_GetKind(offset) == ListKind) { 1159 cursor->next = offset; 1160 cursor->key[cursor->keylen] = 0; 1161 findCFBurstTrieMappedPage(trie, cursor, ctx, callback); 1162 } 1163 } 1164 } 1165} 1166 1167static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) { 1168 if (trie->mapBase) { 1169 if (trie->cflags & kCFBurstTriePrefixCompression) { 1170 fprintf(stderr, "Please use CFBurstTrieCursorRef API for file based trie.\n"); 1171 return; 1172 } else { 1173 TrieHeader *header = (TrieHeader *)trie->mapBase; 1174 MapCursor csr; 1175 csr.next = header->rootOffset; 1176 csr.prefix = prefix; 1177 csr.prefixlen = prefixLen; 1178 csr.key[0] = 0; 1179 csr.keylen = 0; 1180 findCFBurstTrieMappedLevel(trie, &csr, exactmatch, ctx, callback); 1181 } 1182 } else { 1183 TrieCursor csr; 1184 csr.next = ((unsigned long)&trie->root)|TrieKind; 1185 csr.prefix = prefix; 1186 csr.prefixlen = prefixLen; 1187 csr.key[0] = 0; 1188 csr.keylen = 0; 1189 findCFBurstTrieLevel(trie, &csr, exactmatch, ctx, callback); 1190 } 1191} 1192 1193CF_INLINE uint32_t getPackedPageEntrySize(PageEntryPacked *entry) 1194{ 1195 return sizeof(PageEntryPacked) + entry->strlen; 1196} 1197 1198CF_INLINE uint32_t getPageEntrySize(PageEntry *entry) 1199{ 1200 return sizeof(PageEntry) + entry->strlen; 1201} 1202 1203/* 1204static void _printPageEntry(PageEntryPacked *entry) { 1205 printf("entry 0x%p:\n", entry); 1206 printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen); 1207 if (entry->strlen > 0) { 1208 printf("string = "); 1209 for (int i = 0; i < entry->strlen; ++i) 1210 printf("%d ", entry->string[i]); 1211 printf("\n"); 1212 } 1213 printf("\n"); 1214} 1215*/ 1216static Boolean advanceCursorOnMappedPageForByte(Page *page, CompactMapCursor *cursor, UInt8 byte) { 1217 PageEntryPacked *entry; 1218 Boolean found = FALSE; 1219 uint32_t minPrefixLength = 0; 1220 1221 if (cursor->isOnPage) { 1222 entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage]; 1223 //_printPageEntry(entry); 1224 BOOL shouldContinue = TRUE; 1225 1226 if (!(cursor->entryOffsetInPage == 0 && entry->strlen == 0)) { 1227 if (cursor->entryOffsetInPage == entry->strlen - 1) { 1228 minPrefixLength = entry->pfxLen + entry->strlen; 1229 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1230 } else { 1231 cursor->offsetInEntry++; 1232 if (entry->string[cursor->offsetInEntry] == byte) 1233 found = TRUE; 1234 else if (entry->string[cursor->offsetInEntry] > byte) 1235 shouldContinue = FALSE; 1236 else { 1237 minPrefixLength = entry->pfxLen + cursor->offsetInEntry; 1238 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1239 } 1240 } 1241 } 1242 1243 if (found) { 1244 cursor->isOnPage = TRUE; 1245 return TRUE; 1246 } else if (!shouldContinue) 1247 return FALSE; 1248 } else { 1249 cursor->entryOffsetInPage = 0; 1250 } 1251 1252 uint32_t pageSize = page->length - sizeof(Page); 1253 while (cursor->entryOffsetInPage < pageSize) { 1254 entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage]; 1255 //_printPageEntry(entry); 1256 if (minPrefixLength > entry->pfxLen) 1257 break; 1258 else if (minPrefixLength < entry->pfxLen) 1259 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1260 else { 1261 if (entry->strlen == 0) 1262 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1263 else { 1264 if (entry->string[0] > byte) 1265 // Entries are sorted alphabetically 1266 break; 1267 else if (entry->string[0] < byte) 1268 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1269 else { 1270 cursor->offsetInEntry = 0; 1271 found = TRUE; 1272 break; 1273 } 1274 } 1275 } 1276 } 1277 1278 if (found) 1279 cursor->isOnPage = TRUE; 1280 1281 return found; 1282} 1283 1284static Boolean advanceCursorMappedPageWithPerfixCompression(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1285{ 1286 if (length == 0) { 1287 PageEntryPacked *entry = (PageEntryPacked*)&page->data[0]; 1288 if (!cursor->isOnPage) { 1289 cursor->entryOffsetInPage = 0; 1290 cursor->offsetInEntry = 0; 1291 cursor->isOnPage = entry->pfxLen == 0 && entry->strlen == 0; 1292 } 1293 getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload); 1294 return TRUE; 1295 } 1296 1297 for (CFIndex i = 0; i < length; ++i) { 1298 if (!advanceCursorOnMappedPageForByte(page, cursor, bytes[i])) 1299 return FALSE; 1300 } 1301 PageEntryPacked *entry = (PageEntryPacked*)&page->data[cursor->entryOffsetInPage]; 1302 getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload); 1303 return TRUE; 1304} 1305 1306static Boolean advanceCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1307{ 1308 if (length == 0) { 1309 PageEntry*entry = (PageEntry*)&page->data[0]; 1310 if (!cursor->isOnPage) { 1311 cursor->entryOffsetInPage = 0; 1312 cursor->offsetInEntry = 0; 1313 cursor->isOnPage = entry->strlen == 0; 1314 } 1315 getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload); 1316 return TRUE; 1317 } 1318 1319 PageEntry *entry; 1320 uint32_t pageSize = page->length - sizeof(Page); 1321 const UInt8 * prefix = NULL; 1322 uint32_t prefixLength = 0; 1323 1324 if (cursor->isOnPage) { 1325 entry = (PageEntry*)&page->data[cursor->entryOffsetInPage]; 1326 prefix = entry->string; 1327 prefixLength = cursor->offsetInEntry + 1; 1328 } 1329 1330 while (cursor->entryOffsetInPage < pageSize) { 1331 PageEntry *entry = (PageEntry*)&page->data[cursor->entryOffsetInPage]; 1332 if (entry->strlen == 0) { 1333 cursor->entryOffsetInPage += getPageEntrySize(entry); 1334 continue; 1335 } 1336 1337 if (entry->strlen <= prefixLength) 1338 return FALSE; 1339 else { 1340 int cmpResult = 0; 1341 if (prefixLength > 0) 1342 cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength); 1343 if (cmpResult > 0) 1344 return FALSE; 1345 else if (cmpResult == 0) { 1346 if (entry->strlen < prefixLength + length) { 1347 int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, entry->strlen - prefixLength); 1348 if (cmpResult2 > 0) 1349 return FALSE; 1350 else 1351 cursor->entryOffsetInPage += getPageEntrySize(entry); 1352 } else { 1353 int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, length); 1354 if (cmpResult2 > 0) 1355 return FALSE; 1356 else if (cmpResult2 == 0) { 1357 cursor->isOnPage = TRUE; 1358 cursor->offsetInEntry = prefixLength + length - 1; 1359 getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload); 1360 return TRUE; 1361 } else 1362 cursor->entryOffsetInPage += getPageEntrySize(entry); 1363 } 1364 } else 1365 cursor->entryOffsetInPage += getPageEntrySize(entry); 1366 } 1367 } 1368 return FALSE; 1369} 1370 1371static Boolean advanceCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1372{ 1373 if (!bytes || length < 0) 1374 return FALSE; 1375 1376 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1377 uint32_t pageSize = page->length - sizeof(Page); 1378 if (pageSize == 0) 1379 return FALSE; 1380 1381 if (trie->cflags & kCFBurstTrieSortByKey) 1382 return advanceCursorMappedPageSortedByKey(page, cursor, bytes, length); 1383 else if (trie->cflags & kCFBurstTriePrefixCompression) 1384 return advanceCursorMappedPageWithPerfixCompression(page, cursor, bytes, length); 1385 else 1386 return FALSE; 1387} 1388 1389static Boolean advanceCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1390{ 1391 if (!bytes || length < 0) 1392 return FALSE; 1393 1394 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1395 if (length == 0) { 1396 cursor->payload = root->payload; 1397 return TRUE; 1398 } 1399 1400 uint8_t slot = bytes[0] / 64; 1401 uint8_t bit = bytes[0] % 64; 1402 uint32_t item = 0; 1403 uint64_t bword = root->bitmap[slot]; 1404 if (bword & (1ull << bit)) { 1405 // Count all the set bits up to this bit 1406 for (int i = 0; i < slot; ++i) 1407 item += __builtin_popcountll(root->bitmap[i]); 1408 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1409 cursor->next = root->slots[item]; 1410 return advanceMapCursor(trie, cursor, bytes + 1, length - 1); 1411 } else { 1412 return FALSE; 1413 } 1414} 1415 1416static Boolean advanceCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1417{ 1418 if (!bytes || length < 0) 1419 return FALSE; 1420 1421 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1422 if (length == 0) { 1423 cursor->payload = root->payload; 1424 return TRUE; 1425 } 1426 1427 cursor->next = root->slots[bytes[0]]; 1428 return advanceMapCursor(trie, cursor, bytes + 1, length - 1); 1429} 1430 1431static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1432{ 1433 bool result = FALSE; 1434 switch (DiskNextTrie_GetKind(cursor->next)) { 1435 case TrieKind: 1436 result = advanceCursorMappedLevel(trie, cursor, bytes, length); 1437 break; 1438 case CompactTrieKind: 1439 result = advanceCursorCompactMappedLevel(trie, cursor, bytes, length); 1440 break; 1441 case ListKind: 1442 result = advanceCursorMappedPage(trie, cursor, bytes, length); 1443 break; 1444 case Nothing: { 1445 TrieHeader *header = (TrieHeader*)trie->mapBase; 1446 if (cursor->next == header->rootOffset) 1447 result = advanceCursorMappedLevel(trie, cursor, bytes, length); 1448 } 1449 } 1450 1451 return result; 1452} 1453 1454static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1455{ 1456 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1457 if (root->payload) { 1458 callback(ctx, bytes, length, root->payload, stop); 1459 if (*stop) 1460 return; 1461 } 1462 1463 if (length >= capacity) 1464 return; 1465 1466 for (int i = 0; i < CHARACTER_SET_SIZE; ++i) {i = 1467 bytes[length] = i; 1468 cursor->next = (uint32_t)root->slots[i];; 1469 cursor->isOnPage = FALSE; 1470 cursor->entryOffsetInPage = 0; 1471 cursor->offsetInEntry = 0; 1472 cursor->payload = 0; 1473 traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback); 1474 if (*stop) 1475 return; 1476 } 1477} 1478 1479static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1480{ 1481 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1482 if (root->payload) { 1483 callback(ctx, bytes, length, root->payload, stop); 1484 if (*stop) 1485 return; 1486 } 1487 1488 if (length >= capacity) 1489 return; 1490 for (int c = 0; c < CHARACTER_SET_SIZE; ++c) { 1491 bytes[length] = c; 1492 //This could be optimized to remember what the last slot/item was and not count bits over and over. 1493 uint8_t slot = c / 64; 1494 uint8_t bit = c % 64; 1495 uint32_t item = 0; 1496 uint64_t bword = root->bitmap[slot]; 1497 if (bword & (1ull << bit)) { 1498 // Count all the set bits up to this bit 1499 for (int i = 0; i < slot; ++i) 1500 item += __builtin_popcountll(root->bitmap[i]); 1501 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1502 cursor->next = root->slots[item]; 1503 cursor->isOnPage = FALSE; 1504 cursor->entryOffsetInPage = 0; 1505 cursor->offsetInEntry = 0; 1506 cursor->payload = 0; 1507 traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback); 1508 if (*stop) 1509 return; 1510 } 1511 } 1512} 1513 1514static void traverseFromMapCursorMappedPageWithPrefixCompression(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1515{ 1516 uint32_t pageSize = page->length - sizeof(Page); 1517 uint32_t offset = cursor->entryOffsetInPage; 1518 uint32_t minPrefixLength = 0; 1519 if (cursor->isOnPage) { 1520 PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset]; 1521 int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1; 1522 if (remainingLength >= 0 && remainingLength <= capacity) { 1523 memcpy(bytes + length, entry->string + cursor->offsetInEntry + 1, remainingLength); 1524 callback(ctx, bytes, length + remainingLength, entry->payload, stop); 1525 if (*stop) 1526 return; 1527 } 1528 minPrefixLength = entry->pfxLen + cursor->offsetInEntry; 1529 offset += getPackedPageEntrySize(entry); 1530 } 1531 PageEntryPacked *previousEntry = NULL; 1532 while (offset < pageSize) { 1533 PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset]; 1534 if (minPrefixLength > entry->pfxLen) 1535 break; 1536 else if (entry->payload && entry->strlen <= capacity) { 1537 if (previousEntry) 1538 length -= previousEntry->strlen + previousEntry->pfxLen - entry->pfxLen; 1539 memcpy(bytes + length, entry->string, entry->strlen); 1540 callback(ctx, bytes, length + entry->strlen, entry->payload, stop); 1541 length += entry->strlen; 1542 if (*stop) 1543 return; 1544 } 1545 previousEntry = entry; 1546 offset += getPackedPageEntrySize(entry); 1547 } 1548} 1549 1550static void traverseFromMapCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1551{ 1552 uint32_t pageSize = page->length - sizeof(Page); 1553 uint32_t offset = cursor->entryOffsetInPage; 1554 uint32_t prefixLength = 0; 1555 const UInt8 *prefix = NULL; 1556 if (cursor->isOnPage) { 1557 PageEntry *entry = (PageEntry*)&page->data[offset]; 1558 int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1; 1559 if (remainingLength >= 0 && remainingLength <= capacity) { 1560 memcpy(bytes + length, entry->string + cursor->offsetInEntry, remainingLength); 1561 callback(ctx, bytes, length + remainingLength, entry->payload, stop); 1562 if (*stop) 1563 return; 1564 } 1565 prefixLength = cursor->offsetInEntry + 1; 1566 prefix = entry->string; 1567 offset += getPageEntrySize(entry); 1568 } 1569 1570 while (offset < pageSize) { 1571 PageEntry *entry = (PageEntry*)&page->data[offset]; 1572 if (entry->strlen < prefixLength) 1573 break; 1574 else { 1575 int cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength); 1576 if (cmpResult > 0) 1577 break; 1578 else if (entry->payload && entry->strlen <= capacity) { 1579 if (entry->strlen > 0) 1580 memcpy(bytes + length, entry->string + prefixLength, entry->strlen - prefixLength); 1581 callback(ctx, bytes, length + entry->strlen - prefixLength, entry->payload, stop); 1582 if (*stop) 1583 return; 1584 } 1585 offset += getPageEntrySize(entry); 1586 } 1587 } 1588} 1589 1590static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1591{ 1592 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1593 if (trie->cflags & kCFBurstTrieSortByKey) 1594 traverseFromMapCursorMappedPageSortedByKey(page, cursor, bytes, capacity, length, stop, ctx, callback); 1595 else if (trie->cflags & kCFBurstTriePrefixCompression) 1596 traverseFromMapCursorMappedPageWithPrefixCompression(page, cursor, bytes, capacity, length, stop, ctx, callback); 1597} 1598 1599void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1600{ 1601 switch (DiskNextTrie_GetKind(cursor->next)) { 1602 case TrieKind: 1603 traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); 1604 break; 1605 case CompactTrieKind: 1606 traverseFromMapCursorCompactMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); 1607 break; 1608 case ListKind: 1609 traverseFromMapCursorMappedPage(trie, cursor, bytes, capacity, length, stop, ctx, callback); 1610 break; 1611 case Nothing: { 1612 TrieHeader *header = (TrieHeader*)trie->mapBase; 1613 if (cursor->next == header->rootOffset) { 1614 traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); 1615 } 1616 break; 1617 } 1618 } 1619} 1620 1621void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination) 1622{ 1623 destination->next = source->next; 1624 destination->entryOffsetInPage = source->entryOffsetInPage; 1625 destination->offsetInEntry = source->offsetInEntry; 1626 destination->isOnPage = source->isOnPage; 1627 destination->payload = source->payload; 1628} 1629 1630Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs) 1631{ 1632 return lhs->entryOffsetInPage == rhs->entryOffsetInPage && lhs->isOnPage == rhs->isOnPage && lhs->next == rhs->next && lhs->offsetInEntry == rhs->offsetInEntry; 1633} 1634 1635static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload) 1636{ 1637 if (payload) 1638 *payload = 0; 1639 if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) { 1640 if (payload) 1641 *payload = entry->payload; 1642 return TRUE; 1643 } else if (cursor->offsetInEntry != entry->strlen - 1) 1644 return FALSE; 1645 else { 1646 if (payload) 1647 *payload = entry->payload; 1648 return TRUE; 1649 } 1650} 1651 1652static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload) 1653{ 1654 if (payload) 1655 *payload = 0; 1656 if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) { 1657 if (payload) 1658 *payload = entry->payload; 1659 return TRUE; 1660 } else if (cursor->offsetInEntry != entry->strlen - 1) 1661 return FALSE; 1662 else { 1663 if (payload) 1664 *payload = entry->payload; 1665 return TRUE; 1666 } 1667} 1668 1669Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload) 1670{ 1671 if (!cursor) 1672 return FALSE; 1673 if (cursor->payload) { 1674 if (payload) 1675 *payload = cursor->payload; 1676 return TRUE; 1677 } 1678 return FALSE; 1679} 1680 1681// Legacy 1682 1683static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { 1684 Boolean success = false; 1685 if (length) { 1686 uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[*key]); 1687 if(DiskNextTrie_GetKind(offset) == TrieKind) { 1688 return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map,offset), map, key+1, length-1, payload, prefix); 1689 } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1690 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); 1691 } else { 1692 if(DiskNextTrie_GetKind(offset) == ListKind) { 1693 return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix); 1694 } else { 1695 return success; 1696 } 1697 } 1698 } else { 1699 if (trie->weight) { 1700 SetPayload(payload, CFSwapInt32LittleToHost(trie->payload)); 1701 success = true; 1702 } 1703 } 1704 return success; 1705} 1706 1707static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { 1708 Boolean success = false; 1709 if (length) { 1710 uint32_t mykey = *key; 1711 uint32_t slot = mykey / 64; 1712 uint32_t bit = mykey % 64; 1713 uint32_t item = 0; 1714 uint64_t bword = CFSwapInt64LittleToHost(trie->bitmap[slot]); 1715 if (bword & (1ull << bit)) { 1716 /* Count all the set bits up to this bit */ 1717 for (int i=0; i < slot; i++) { 1718 item += __builtin_popcountll(trie->bitmap[i]); 1719 } 1720 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1721 uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[item]); 1722 if(DiskNextTrie_GetKind(offset) == TrieKind) { 1723 return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); 1724 } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1725 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); 1726 } 1727 else { 1728 if(DiskNextTrie_GetKind(offset) == ListKind) { 1729 return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix); 1730 } else { 1731 return success; 1732 } 1733 } 1734 } 1735 } else { 1736 if (trie->weight) { 1737 SetPayload(payload, CFSwapInt32LittleToHost(trie->payload)); 1738 success = true; 1739 } 1740 } 1741 return success; 1742} 1743 1744static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { 1745 Boolean success = false; 1746 uint32_t end = CFSwapInt32LittleToHost(page->length); 1747 uint32_t cur = 0; 1748 if (prefix) { 1749 uint8_t pfx[256]; 1750 while (cur < end) { 1751 StringPageEntryPacked *entry = (StringPageEntryPacked *)&page->data[cur]; 1752 uint16_t strlen = entry->pfxLen+CFSwapInt16LittleToHost(entry->strlen); 1753 if (strlen == length && __builtin_memcmp(pfx, key, entry->pfxLen) == 0 && __builtin_memcmp(entry->string, key+entry->pfxLen, length-entry->pfxLen) == 0) { 1754 SetPayload(payload, CFSwapInt32LittleToHost(entry->payload)); 1755 success = true; 1756 return success; 1757 } else { 1758 memcpy(pfx+entry->pfxLen, entry->string, MIN(255-entry->pfxLen, length-entry->pfxLen)); 1759 cur += sizeof(*entry) + strlen - entry->pfxLen; 1760 } 1761 } 1762 } else { 1763 while (cur < end) { 1764 StringPageEntry *entry = (StringPageEntry *)&page->data[cur]; 1765 uint16_t strlen = CFSwapInt16LittleToHost(entry->strlen); 1766 if (strlen == length && __builtin_memcmp(entry->string, key, length) == 0) { 1767 SetPayload(payload, CFSwapInt32LittleToHost(entry->payload)); 1768 success = true; 1769 return success; 1770 } else { 1771 cur += sizeof(*entry) + strlen; 1772 } 1773 } 1774 1775 } 1776 return success; 1777} 1778 1779 1780#if 0 1781#pragma mark - 1782#pragma mark Serialization 1783#endif 1784 1785static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie, TrieLevelRef root, uint32_t *offset, off_t start_offset, bool dispose, bool isroot, int fd) 1786{ 1787 bool dense = true; 1788 int count = 0; 1789 1790 for (int i=0; i < CHARACTER_SET_SIZE; i++) if (root->slots[i]) count++; 1791 1792 uint32_t this_offset = *offset; 1793 1794 if ((trie->cflags & kCFBurstTrieBitmapCompression) && count < MAX_BITMAP_SIZE && !isroot) { 1795 size_t size = sizeof(CompactMapTrieLevel) + sizeof(uint32_t) * count; 1796 int offsetSlot = 0; 1797 1798 CompactMapTrieLevel *maptrie = (CompactMapTrieLevel *)alloca(size); 1799 bzero(maptrie, size); 1800 *offset += size; 1801 1802 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1803 NextTrie next = root->slots[i]; 1804 if (next) { 1805 uint32_t slot = i / 64; 1806 uint32_t bit = i % 64; 1807 maptrie->bitmap[slot] |= 1ull<<bit; 1808 if (NextTrie_GetKind(next) == TrieKind) { 1809 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next); 1810 uint32_t childOffset = *offset; 1811 if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) { 1812 maptrie->slots[offsetSlot] = (TrieKind|childOffset); 1813 } else { 1814 maptrie->slots[offsetSlot] = (CompactTrieKind|childOffset); 1815 } 1816 } else { 1817 maptrie->slots[offsetSlot] = next; 1818 } 1819 offsetSlot++; 1820 } 1821 } 1822 maptrie->payload = root->payload; 1823 1824 int bitcount = 0; 1825 for (int i=0; i < 4; i++) bitcount += __builtin_popcountll(maptrie->bitmap[i]); 1826 assert(bitcount == count); 1827 1828 pwrite(fd, maptrie, size, this_offset+start_offset); 1829 dense = false; 1830 } else { 1831 MapTrieLevel maptrie; 1832 *offset += sizeof(maptrie); 1833 1834 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1835 NextTrie next = root->slots[i]; 1836 if (NextTrie_GetKind(next) == TrieKind) { 1837 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next); 1838 uint32_t childOffset = *offset; 1839 if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) { 1840 maptrie.slots[i] = (TrieKind|childOffset); 1841 } else { 1842 maptrie.slots[i] = (CompactTrieKind|childOffset); 1843 } 1844 } else { 1845 maptrie.slots[i] = next; 1846 } 1847 } 1848 maptrie.payload = root->payload; 1849 pwrite(fd, &maptrie, sizeof(maptrie), this_offset+start_offset); 1850 } 1851 1852 if (dispose) free(root); 1853 return dense; 1854} 1855 1856static void serializeCFBurstTrieList(CFBurstTrieRef trie, ListNodeRef listNode, int fd) 1857{ 1858 uint32_t listCount; 1859 size_t size = trie->containerSize; 1860 1861 // ** Temp list of nodes to sort 1862 ListNodeRef *nodes = (ListNodeRef *)malloc(sizeof(ListNodeRef) * size); 1863 for (listCount = 0; listNode; listCount++) { 1864 if (listCount >= size) { 1865 size *= 2; 1866 nodes = (ListNodeRef *)realloc(nodes, sizeof(ListNodeRef) * size); 1867 } 1868 nodes[listCount] = listNode; 1869 listNode = listNode->next; 1870 } 1871 1872 char _buffer[MAX_BUFFER_SIZE]; 1873 size_t bufferSize = (sizeof(Page) + size * (sizeof(PageEntryPacked) + MAX_STRING_SIZE)); 1874 char *buffer = bufferSize < MAX_BUFFER_SIZE ? _buffer : (char *) malloc(bufferSize); 1875 1876 Page *page = (Page *)buffer; 1877 uint32_t current = 0; 1878 1879 if (trie->cflags & kCFBurstTriePrefixCompression) { 1880 qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare); 1881 1882 ListNodeRef last = 0; 1883 for (int i=0; i < listCount; i++) { 1884 listNode = nodes[i]; 1885 uint8_t pfxLen = 0; 1886 if (last) { 1887 for ( ; 1888 pfxLen < CHARACTER_SET_SIZE-1 && 1889 pfxLen < listNode->length && 1890 pfxLen < last->length && 1891 listNode->string[pfxLen] == last->string[pfxLen]; 1892 pfxLen++); 1893 } 1894 1895 PageEntryPacked *entry = (PageEntryPacked *)(&page->data[current]); 1896 entry->strlen = listNode->length - pfxLen; 1897 entry->payload = listNode->payload; 1898 entry->pfxLen = pfxLen; 1899 memcpy(entry->string, listNode->string+pfxLen, listNode->length-pfxLen); 1900 current += listNode->length - pfxLen + sizeof(PageEntryPacked); 1901 last = listNode; 1902 } 1903 } else { 1904 if (trie->cflags & kCFBurstTrieSortByKey) 1905 qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare); 1906 else 1907 qsort(nodes, listCount, sizeof(ListNodeRef), nodeWeightCompare); 1908 1909 for (int i=0; i < listCount; i++) { 1910 listNode = nodes[i]; 1911 PageEntry *entry = (PageEntry *)(&page->data[current]); 1912 entry->strlen = listNode->length; 1913 entry->payload = listNode->payload; 1914 memcpy(entry->string, listNode->string, listNode->length); 1915 current += listNode->length + sizeof(PageEntry); 1916 } 1917 } 1918 1919 size_t len = (sizeof(Page) + current + 3) & ~3; 1920 page->length = current; 1921 write(fd, page, len); 1922 1923 free(nodes); 1924 if (buffer != _buffer) free(buffer); 1925} 1926 1927static void serializeCFBurstTrieLists(CFBurstTrieRef trie, TrieLevelRef root, off_t start_offset, int fd) 1928{ 1929 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1930 NextTrie next = root->slots[i]; 1931 uint32_t offset; 1932 if (NextTrie_GetKind(next) == TrieKind) { 1933 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next); 1934 serializeCFBurstTrieLists(trie, nextLevel, start_offset, fd); 1935 } else { 1936 if (NextTrie_GetKind(next) == ListKind) { 1937 ListNodeRef listNode = (ListNodeRef)NextTrie_GetPtr(next); 1938 offset = lseek(fd, 0, SEEK_CUR) - start_offset; 1939 serializeCFBurstTrieList(trie, listNode, fd); 1940 finalizeCFBurstTrieList(listNode); 1941 //assert((offset & 3)==0); 1942 root->slots[i] = (offset|ListKind); 1943 } 1944 } 1945 } 1946} 1947 1948static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd) 1949{ 1950 TrieHeader header; 1951 header.signature = 0x0ddba11; 1952 header.rootOffset = 0; 1953 header.count = trie->count; 1954 header.size = 0; 1955 header.flags = trie->cflags; 1956 header.reserved[0] = 0; 1957 1958 uint32_t offset; 1959 lseek(fd, start_offset, SEEK_SET); 1960 1961 size_t header_size = sizeof(header); 1962 write(fd, &header, header_size); 1963 1964 serializeCFBurstTrieLists(trie, &trie->root, start_offset, fd); 1965 1966 offset = lseek(fd, 0, SEEK_CUR) - start_offset; 1967 size_t off = offsetof(TrieHeader, rootOffset); 1968 size_t offsize = sizeof(offset); 1969 pwrite(fd, &offset, offsize, off+start_offset); 1970 1971 serializeCFBurstTrieLevels(trie, &trie->root, &offset, start_offset, false, true, fd); 1972 1973 size_t off2 = offsetof(TrieHeader, size); 1974 offsize = sizeof(offset); 1975 pwrite(fd, &offset, offsize, off2+start_offset); 1976 1977 offset = lseek(fd, 0, SEEK_END); 1978 return (size_t)(offset-start_offset); 1979} 1980 1981#if 0 1982#pragma mark - 1983#pragma mark Release 1984#endif 1985 1986static void destroyCFBurstTrie(CFBurstTrieRef trie) { 1987 if (trie->mapBase) { 1988#if DEPLOYMENT_TARGET_WINDOWS 1989 UnmapViewOfFile(trie->mapBase); 1990 CloseHandle(trie->mapHandle); 1991 CloseHandle(trie->mappedFileHandle); 1992#else 1993 munmap(trie->mapBase, trie->mapSize); 1994#endif 1995 } else { 1996 finalizeCFBurstTrie(&trie->root); 1997 } 1998 free(trie); 1999 return; 2000} 2001 2002static void finalizeCFBurstTrie(TrieLevelRef trie) { 2003 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 2004 if (NextTrie_GetKind(trie->slots[i]) == TrieKind) { 2005 finalizeCFBurstTrie((TrieLevelRef)NextTrie_GetPtr(trie->slots[i])); 2006 free((void *)NextTrie_GetPtr(trie->slots[i])); 2007 } else if (NextTrie_GetKind(trie->slots[i]) == ListKind) { 2008 finalizeCFBurstTrieList((ListNodeRef)NextTrie_GetPtr(trie->slots[i])); 2009 } 2010 } 2011} 2012 2013static void finalizeCFBurstTrieList(ListNodeRef node) { 2014 do { 2015 ListNodeRef next = node->next; 2016 free(node); 2017 node = next; 2018 } while(node); 2019} 2020 2021#if 0 2022#pragma mark - 2023#pragma mark Helpers 2024#endif 2025 2026static int nodeWeightCompare(const void *a, const void *b) { 2027 ListNodeRef nodeA = *(ListNodeRef *)a; 2028 ListNodeRef nodeB = *(ListNodeRef *)b; 2029 return (nodeB->weight - nodeA->weight); 2030} 2031 2032static int nodeStringCompare(const void *a, const void *b) { 2033 ListNodeRef nodeA = *(ListNodeRef *)a; 2034 ListNodeRef nodeB = *(ListNodeRef *)b; 2035 int result = memcmp((char *)nodeA->string, (char *)nodeB->string, MIN(nodeA->length, nodeB->length)); 2036 if (result == 0) result = nodeA->length-nodeB->length; 2037 return result; 2038} 2039 2040static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact) 2041{ 2042 uint32_t *ctx = (uint32_t *)context; 2043 if (exact) *ctx = payload; 2044 return exact; 2045} 2046 2047static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer) { 2048 uint32_t i, j; 2049 for (i = j = 0; i < numChars; i++) { 2050 UniChar c = chars[i]; 2051 if (CFStringIsSurrogateHighCharacter(c) && i + 1 < numChars && CFStringIsSurrogateLowCharacter(chars[i + 1])) { 2052 UTF32Char lc = CFStringGetLongCharacterForSurrogatePair(c, chars[i + 1]); 2053 buffer[j++] = 0xf0 + (lc >> 18); 2054 buffer[j++] = 0x80 + ((lc & 0x3ffff) >> 12); 2055 buffer[j++] = 0x80 + ((lc & 0xfff) >> 6); 2056 buffer[j++] = 0x80 + (lc & 0x3f); 2057 i++; 2058 } else { 2059 if (c < 0x80) { 2060 buffer[j++] = c; 2061 } else if (c < 0x800) { 2062 buffer[j++] = 0xc0 + (c >> 6); 2063 buffer[j++] = 0x80 + (c & 0x3f); 2064 } else { 2065 buffer[j++] = 0xe0 + (c >> 12); 2066 buffer[j++] = 0x80 + ((c & 0xfff) >> 6); 2067 buffer[j++] = 0x80 + (c & 0x3f); 2068 } 2069 } 2070 } 2071 buffer[j] = 0; 2072 return j; 2073} 2074 2075