1# GDBM_File.pm -- Perl 5 interface to GNU gdbm library. 2 3=head1 NAME 4 5GDBM_File - Perl5 access to the gdbm library. 6 7=head1 SYNOPSIS 8 9 use GDBM_File; 10 [$db =] tie %hash, 'GDBM_File', $filename, GDBM_WRCREAT, 0640 11 or die "$GDBM_File::gdbm_errno"; 12 # Use the %hash... 13 14 $e = $db->errno; 15 $e = $db->syserrno; 16 $str = $db->strerror; 17 $bool = $db->needs_recovery; 18 19 $db->clear_error; 20 21 $db->reorganize; 22 $db->sync; 23 24 $n = $db->count; 25 26 $n = $db->flags; 27 28 $str = $db->dbname; 29 30 $db->cache_size; 31 $db->cache_size($newsize); 32 33 $n = $db->block_size; 34 35 $bool = $db->sync_mode; 36 $db->sync_mode($bool); 37 38 $bool = $db->centfree; 39 $db->centfree($bool); 40 41 $bool = $db->coalesce; 42 $db->coalesce($bool); 43 44 $bool = $db->mmap; 45 46 $size = $db->mmapsize; 47 $db->mmapsize($newsize); 48 49 $db->recover(%args); 50 51 untie %hash ; 52 53=head1 DESCRIPTION 54 55B<GDBM_File> is a module which allows Perl programs to make use of the 56facilities provided by the GNU gdbm library. If you intend to use this 57module you should really have a copy of the B<GDBM manual> at hand. 58The manual is avaialble online at 59L<https://www.gnu.org.ua/software/gdbm/manual>. 60 61Most of the B<gdbm> functions are available through the B<GDBM_File> 62interface. 63 64Unlike Perl's built-in hashes, it is not safe to C<delete> the current 65item from a GDBM_File tied hash while iterating over it with C<each>. 66This is a limitation of the gdbm library. 67 68=head2 Tie 69 70Use the Perl built-in B<tie> to associate a B<GDBM> database with a Perl 71hash: 72 73 tie %hash, 'GDBM_File', $filename, $flags, $mode; 74 75Here, I<$filename> is the name of the database file to open or create. 76I<$flags> is a bitwise OR of I<access mode> and optional I<modifiers>. 77Access mode is one of: 78 79=over 4 80 81=item B<GDBM_READER> 82 83Open existing database file in read-only mode. 84 85=item B<GDBM_WRITER> 86 87Open existing database file in read-write mode. 88 89=item B<GDBM_WRCREAT> 90 91If the database file exists, open it in read-write mode. If it doesn't, 92create it first and open read-write. 93 94=item B<GDBM_NEWDB> 95 96Create new database and open it read-write. If the database already exists, 97truncate it first. 98 99=back 100 101A number of modifiers can be OR'd to the access mode. Most of them are 102rarely needed (see L<https://www.gnu.org.ua/software/gdbm/manual/Open.html> 103for a complete list), but one is worth mentioning. The B<GDBM_NUMSYNC> 104modifier, when used with B<GDBM_NEWDB>, instructs B<GDBM> to create the 105database in I<extended> (so called I<numsync>) format. This format is 106best suited for crash-tolerant implementations. See B<CRASH TOLERANCE> 107below for more information. 108 109The I<$mode> parameter is the file mode for creating new database 110file. Use an octal constant or a combination of C<S_I*> constants 111from the B<Fcntl> module. This parameter is used if I<$flags> is 112B<GDBM_NEWDB> or B<GDBM_WRCREAT>. 113 114On success, B<tie> returns an object of class B<GDBM_File>. On failure, 115it returns B<undef>. It is recommended to always check the return value, 116to make sure your hash is successfully associated with the database file. 117See B<ERROR HANDLING> below for examples. 118 119=head1 STATIC METHODS 120 121=head2 GDBM_version 122 123 $str = GDBM_File->GDBM_version; 124 @ar = GDBM_File->GDBM_version; 125 126Returns the version number of the underlying B<libgdbm> library. In scalar 127context, returns the library version formatted as string: 128 129 MINOR.MAJOR[.PATCH][ (GUESS)] 130 131where I<MINOR>, I<MAJOR>, and I<PATCH> are version numbers, and I<GUESS> is 132a guess level (see below). 133 134In list context, returns a list: 135 136 ( MINOR, MAJOR, PATCH [, GUESS] ) 137 138The I<GUESS> component is present only if B<libgdbm> version is 1.8.3 or 139earlier. This is because earlier releases of B<libgdbm> did not include 140information about their version and the B<GDBM_File> module has to implement 141certain guesswork in order to determine it. I<GUESS> is a textual description 142in string context, and a positive number indicating how rough the guess is 143in list context. Possible values are: 144 145=over 4 146 147=item 1 - exact guess 148 149The major and minor version numbers are guaranteed to be correct. The actual 150patchlevel is most probably guessed right, but can be 1-2 less than indicated. 151 152=item 2 - approximate 153 154The major and minor number are guaranteed to be correct. The patchlevel is 155set to the upper bound. 156 157=item 3 - rough guess 158 159The version is guaranteed to be not newer than B<I<MAJOR>.I<MINOR>>. 160 161=back 162 163=head1 ERROR HANDLING 164 165=head2 $GDBM_File::gdbm_errno 166 167When referenced in numeric context, retrieves the current value of the 168B<gdbm_errno> variable, i.e. a numeric code describing the state of the 169most recent operation on any B<gdbm> database. Each numeric code has a 170symbolic name associated with it. For a comprehensive list of these, see 171L<https://www.gnu.org.ua/software/gdbm/manual/Error-codes.html>. Notice, 172that this list includes all error codes defined for the most recent 173version of B<gdbm>. Depending on the actual version of the library 174B<GDBM_File> is built with, some of these may be missing. 175 176In string context, B<$gdbm_errno> returns a human-readable description of 177the error. If necessary, this description includes the value of B<$!>. 178This makes it possible to use it in diagnostic messages. For example, 179the usual tying sequence is 180 181 tie %hash, 'GDBM_File', $filename, GDBM_WRCREAT, 0640 182 or die "$GDBM_File::gdbm_errno"; 183 184The following, more complex, example illustrates how you can fall back 185to read-only mode if the database file permissions forbid read-write 186access: 187 188 use Errno qw(EACCES); 189 unless (tie(%hash, 'GDBM_File', $filename, GDBM_WRCREAT, 0640)) { 190 if ($GDBM_File::gdbm_errno == GDBM_FILE_OPEN_ERROR 191 && $!{EACCES}) { 192 if (tie(%hash, 'GDBM_File', $filename, GDBM_READER, 0640)) { 193 die "$GDBM_File::gdbm_errno"; 194 } 195 } else { 196 die "$GDBM_File::gdbm_errno"; 197 } 198 } 199 200=head2 gdbm_check_syserr 201 202 if (gdbm_check_syserr(gdbm_errno)) ... 203 204Returns true if the system error number (B<$!>) gives more information on 205the cause of the error. 206 207=head1 DATABASE METHODS 208 209=head2 close 210 211 $db->close; 212 213Closes the database. Normally you would just do B<untie>. However, you 214will need to use this function if you have explicitly assigned the result 215of B<tie> to a variable, and wish to release the database to another 216users. Consider the following code: 217 218 $db = tie %hash, 'GDBM_File', $filename, GDBM_WRCREAT, 0640; 219 # Do something with %hash or $db... 220 untie %hash; 221 $db->close; 222 223In this example, doing B<untie> alone is not enough, since the database 224would remain referenced by B<$db>, and, as a consequence, the database file 225would remain locked. Calling B<$db-E<gt>close> ensures the database file is 226closed and unlocked. 227 228=head2 errno 229 230 $db->errno 231 232Returns the last error status associated with this database. In string 233context, returns a human-readable description of the error. See also 234B<$GDBM_File::gdbm_errno> variable above. 235 236=head2 syserrno 237 238 $db->syserrno 239 240Returns the last system error status (C C<errno> variable), associated with 241this database, 242 243=head2 strerror 244 245 $db->strerror 246 247Returns textual description of the last error that occurred in this database. 248 249=head2 clear_error 250 251 $db->clear_error 252 253Clear error status. 254 255=head2 needs_recovery 256 257 $db->needs_recovery 258 259Returns true if the database needs recovery. 260 261=head2 reorganize 262 263 $db->reorganize; 264 265Reorganizes the database. 266 267=head2 sync 268 269 $db->sync; 270 271Synchronizes recent changes to the database with its disk copy. 272 273=head2 count 274 275 $n = $db->count; 276 277Returns number of keys in the database. 278 279=head2 flags 280 281 $db->flags; 282 283Returns flags passed as 4th argument to B<tie>. 284 285=head2 dbname 286 287 $db->dbname; 288 289Returns the database name (i.e. 3rd argument to B<tie>. 290 291=head2 cache_size 292 293 $db->cache_size; 294 $db->cache_size($newsize); 295 296Returns the size of the internal B<GDBM> cache for that database. 297 298Called with argument, sets the size to I<$newsize>. 299 300=head2 block_size 301 302 $db->block_size; 303 304Returns the block size of the database. 305 306=head2 sync_mode 307 308 $db->sync_mode; 309 $db->sync_mode($bool); 310 311Returns the status of the automatic synchronization mode. Called with argument, 312enables or disables the sync mode, depending on whether $bool is B<true> or 313B<false>. 314 315When synchronization mode is on (B<true>), any changes to the database are 316immediately written to the disk. This ensures database consistency in case 317of any unforeseen errors (e.g. power failures), at the expense of considerable 318slowdown of operation. 319 320Synchronization mode is off by default. 321 322=head2 centfree 323 324 $db->centfree; 325 $db->centfree($bool); 326 327Returns status of the central free block pool (B<0> - disabled, 328B<1> - enabled). 329 330With argument, changes its status. 331 332By default, central free block pool is disabled. 333 334=head2 coalesce 335 336 $db->coalesce; 337 $db->coalesce($bool); 338 339=head2 mmap 340 341 $db->mmap; 342 343Returns true if memory mapping is enabled. 344 345This method will B<croak> if the B<libgdbm> library is complied without 346memory mapping support. 347 348=head2 mmapsize 349 350 $db->mmapsize; 351 $db->mmapsize($newsize); 352 353If memory mapping is enabled, returns the size of memory mapping. With 354argument, sets the size to B<$newsize>. 355 356This method will B<croak> if the B<libgdbm> library is complied without 357memory mapping support. 358 359=head2 recover 360 361 $db->recover(%args); 362 363Recovers data from a failed database. B<%args> is optional and can contain 364following keys: 365 366=over 4 367 368=item err => sub { ... } 369 370Reference to code for detailed error reporting. Upon encountering an error, 371B<recover> will call this sub with a single argument - a description of the 372error. 373 374=item backup => \$str 375 376Creates a backup copy of the database before recovery and returns its 377filename in B<$str>. 378 379=item max_failed_keys => $n 380 381Maximum allowed number of failed keys. If the actual number becomes equal 382to I<$n>, B<recover> aborts and returns error. 383 384=item max_failed_buckets => $n 385 386Maximum allowed number of failed buckets. If the actual number becomes equal 387to I<$n>, B<recover> aborts and returns error. 388 389=item max_failures => $n 390 391Maximum allowed number of failures during recovery. 392 393=item stat => \%hash 394 395Return recovery statistics in I<%hash>. Upon return, the following keys will 396be present: 397 398=over 8 399 400=item recovered_keys 401 402Number of successfully recovered keys. 403 404=item recovered_buckets 405 406Number of successfully recovered buckets. 407 408=item failed_keys 409 410Number of keys that failed to be retrieved. 411 412=item failed_buckets 413 414Number of buckets that failed to be retrieved. 415 416=back 417 418=back 419 420=head2 convert 421 422 $db->convert($format); 423 424Changes the format of the database file referred to by B<$db>. 425 426Starting from version 1.20, B<gdbm> supports two database file formats: 427I<standard> and I<extended>. The former is the traditional database 428format, used by previous B<gdbm> versions. The I<extended> format contains 429additional data and is recommended for use in crash tolerant applications. 430 431L<https://www.gnu.org.ua/software/gdbm/manual/Numsync.html>, for the 432discussion of both formats. 433 434The B<$format> argument sets the new desired database format. It is 435B<GDBM_NUMSYNC> to convert the database from standard to extended format, and 436B<0> to convert it from extended to standard format. 437 438If the database is already in the requested format, the function returns 439success without doing anything. 440 441=head2 dump 442 443 $db->dump($filename, %options) 444 445Creates a dump of the database file in I<$filename>. Such file can be used 446as a backup copy or sent over a wire to recreate the database on another 447machine. To create a database from the dump file, use the B<load> method. 448 449B<GDBM> supports two dump formats: old I<binary> and new I<ascii>. The 450binary format is not portable across architectures and is deprecated. It 451is supported for backward compatibility. The ascii format is portable and 452stores additional meta-data about the file. It was introduced with the 453B<gdbm> version 1.11 and is the preferred dump format. The B<dump> method 454creates ascii dumps by default. 455 456If the named file already exists, the function will refuse to overwrite and 457will croak an error. If it doesn't exist, it will be created with the 458mode B<0666> modified by the current B<umask>. 459 460These defaults can be altered using the following I<%options>: 461 462=over 4 463 464=item B<binary> => 1 465 466Create dump in I<binary> format. 467 468=item B<mode> => I<MODE> 469 470Set file mode to I<MODE>. 471 472=item B<overwrite> => 1 473 474Silently overwrite existing files. 475 476=back 477 478=head2 load 479 480 $db->load($filename, %options) 481 482Load the data from the dump file I<$filename> into the database I<$db>. 483The file must have been previously created using the B<dump> method. File 484format is recognized automatically. By default, the function will croak 485if the dump contains a key that already exists in the database. It will 486silently ignore the failure to restore database mode and/or ownership. 487These defaults can be altered using the following I<%options>: 488 489=over 4 490 491=item B<replace> => 1 492 493Replace existing keys. 494 495=item B<restore_mode> => 0 | 1 496 497If I<0>, don't try to restore the mode of the database file to that stored 498in the dump. 499 500=item B<restore_owner> => 0 | 1 501 502If I<0>, don't try to restore the owner of the database file to that stored 503in the dump. 504 505=item B<strict_errors> => 1 506 507Croak if failed to restore ownership and/or mode. 508 509=back 510 511The usual sequence to recreate a database from the dump file is: 512 513 my %hash; 514 my $db = tie %hash, 'GDBM_File', 'a.db', GDBM_NEWDB, 0640; 515 $db->load('a.dump'); 516 517=head1 CRASH TOLERANCE 518 519Crash tolerance is a new feature that, given appropriate support from the OS 520and the filesystem, guarantees that a logically consistent recent state of the 521database can be recovered following a crash, such as power outage, OS kernel 522panic, or the like. 523 524Crash tolerance support appeared in B<gdbm> version 1.21. The theory behind 525it is explained in "Crashproofing the Original NoSQL Key-Value Store", 526by Terence Kelly (L<https://queue.acm.org/detail.cfm?id=3487353>). A 527detailed discussion of the B<gdbm> implementation is available in the 528B<GDBM Manual> (L<https://www.gnu.org.ua/software/gdbm/manual/Crash-Tolerance.html>). The information below describes the Perl interface. 529 530For maximum robustness, we recommend to use I<extended database format> 531for crash tolerant databases. To create a database in extended format, 532use the B<GDBM_NEWDB|GDBM_NUMSYNC> when opening the database, e.g.: 533 534 $db = tie %hash, 'GDBM_File', $filename, 535 GDBM_NEWDB|GDBM_NUMSYNC, 0640; 536 537To convert existing database to the extended format, use the B<convert> 538method, described above, e.g.: 539 540 $db->convert(GDBM_NUMSYNC); 541 542=head2 crash_tolerance_status 543 544 GDBM_File->crash_tolerance_status; 545 546This static method returns the status of crash tolerance support. A 547non-zero value means crash tolerance is compiled in and supported by 548the operating system. 549 550=head2 failure_atomic 551 552 $db->failure_atomic($even, $odd) 553 554Enables crash tolerance for the database B<$db>, Arguments are 555the pathnames of two files that will be created and filled with 556I<snapshots> of the database file. The two files must not exist 557when this method is called and must reside on the same filesystem 558as the database file. This filesystem must be support the I<reflink> 559operation (https://www.gnu.org.ua/software/gdbm/manual/Filesystems-supporting-crash-tolerance.html>. 560 561After a successful call to B<failure_atomic>, every call to B<$db->sync> 562method will make an efficient reflink snapshot of the database file in 563one of these files; consecutive calls to B<sync> alternate between the 564two, hence the names. 565 566The most recent of these files can be used to recover the database after 567a crash. To select the right snapshot, use the B<latest_snapshot> 568static method. 569 570=head2 latest_snapshot 571 572 $file = GDBM_File->latest_snapshot($even, $odd); 573 574 ($file, $error) = GDBM_File->latest_snapshot($even, $odd); 575 576Given the two snapshot names (the ones used previously in a call to 577B<failure_atomic>), this method selects the one suitable for database 578recovery, i.e. the file which contains the most recent database snapshot. 579 580In scalar context, it returns the selected file name or B<undef> in case 581of failure. 582 583In array context, the returns a list of two elements: the file name 584and status code. On success, the file name is defined and the code 585is B<GDBM_SNAPSHOT_OK>. On error, the file name is B<undef>, and 586the status is one of the following: 587 588=over 4 589 590=item GDBM_SNAPSHOT_BAD 591 592Neither snapshot file is applicable. This means that the crash has occurred 593before a call to B<failure_atomic> completed. In this case, it is best to 594fall back on a safe backup copy of the data file. 595 596=item GDBM_SNAPSHOT_ERR 597 598A system error occurred. Examine B<$!> for details. See 599<https://www.gnu.org.ua/software/gdbm/manual/Crash-recovery.html> for 600a comprehensive list of error codes and their meaning. 601 602=item GDBM_SNAPSHOT_SAME 603 604The file modes and modification dates of both snapshot files are exactly the 605same. This can happen only for databases in standard format. 606 607=item GDBM_SNAPSHOT_SUSPICIOUS 608 609The I<numsync> counters of the two snapshots differ by more than one. The 610most probable reason is programmer's error: the two parameters refer to 611snapshots belonging to different database files. 612 613=back 614 615=head1 AVAILABILITY 616 617gdbm is available from any GNU archive. The master site is 618C<ftp.gnu.org>, but you are strongly urged to use one of the many 619mirrors. You can obtain a list of mirror sites from 620L<http://www.gnu.org/order/ftp.html>. 621 622=head1 SECURITY AND PORTABILITY 623 624GDBM files are not portable across platforms. If you wish to transfer 625a GDBM file over the wire, dump it to a portable format first. 626 627B<Do not accept GDBM files from untrusted sources.> 628 629Robustness of GDBM against corrupted databases depends highly on its 630version. Versions prior to 1.15 did not implement any validity 631checking, so that a corrupted or maliciously crafted database file 632could cause perl to crash or even expose a security vulnerability. 633Versions between 1.15 and 1.20 were progressively strengthened against 634invalid inputs. Finally, version 1.21 had undergone extensive fuzzy 635checking which proved its ability to withstand any kinds of inputs 636without crashing. 637 638=head1 SEE ALSO 639 640L<perl(1)>, L<DB_File(3)>, L<perldbmfilter>, 641L<gdbm(3)>, 642L<https://www.gnu.org.ua/software/gdbm/manual.html>. 643 644=cut 645 646package GDBM_File; 647 648use strict; 649use warnings; 650our($VERSION, @ISA, @EXPORT); 651 652require Carp; 653require Tie::Hash; 654use Exporter 'import'; 655require XSLoader; 656@ISA = qw(Tie::Hash); 657@EXPORT = qw( 658 GDBM_CACHESIZE 659 GDBM_CENTFREE 660 GDBM_COALESCEBLKS 661 GDBM_FAST 662 GDBM_FASTMODE 663 GDBM_INSERT 664 GDBM_NEWDB 665 GDBM_NOLOCK 666 GDBM_OPENMASK 667 GDBM_READER 668 GDBM_REPLACE 669 GDBM_SYNC 670 GDBM_SYNCMODE 671 GDBM_WRCREAT 672 GDBM_WRITER 673 GDBM_NOMMAP 674 GDBM_CLOEXEC 675 GDBM_BSEXACT 676 GDBM_XVERIFY 677 GDBM_PREREAD 678 GDBM_NUMSYNC 679 GDBM_SNAPSHOT_OK 680 GDBM_SNAPSHOT_BAD 681 GDBM_SNAPSHOT_ERR 682 GDBM_SNAPSHOT_SAME 683 GDBM_SNAPSHOT_SUSPICIOUS 684 GDBM_NO_ERROR 685 GDBM_MALLOC_ERROR 686 GDBM_BLOCK_SIZE_ERROR 687 GDBM_FILE_OPEN_ERROR 688 GDBM_FILE_WRITE_ERROR 689 GDBM_FILE_SEEK_ERROR 690 GDBM_FILE_READ_ERROR 691 GDBM_BAD_MAGIC_NUMBER 692 GDBM_EMPTY_DATABASE 693 GDBM_CANT_BE_READER 694 GDBM_CANT_BE_WRITER 695 GDBM_READER_CANT_DELETE 696 GDBM_READER_CANT_STORE 697 GDBM_READER_CANT_REORGANIZE 698 GDBM_UNKNOWN_UPDATE 699 GDBM_ITEM_NOT_FOUND 700 GDBM_REORGANIZE_FAILED 701 GDBM_CANNOT_REPLACE 702 GDBM_ILLEGAL_DATA 703 GDBM_OPT_ALREADY_SET 704 GDBM_OPT_ILLEGAL 705 GDBM_BYTE_SWAPPED 706 GDBM_BAD_FILE_OFFSET 707 GDBM_BAD_OPEN_FLAGS 708 GDBM_FILE_STAT_ERROR 709 GDBM_FILE_EOF 710 GDBM_NO_DBNAME 711 GDBM_ERR_FILE_OWNER 712 GDBM_ERR_FILE_MODE 713 GDBM_UNKNOWN_ERROR 714 GDBM_NEED_RECOVERY 715 GDBM_BACKUP_FAILED 716 GDBM_DIR_OVERFLOW 717 GDBM_BAD_BUCKET 718 GDBM_BAD_HEADER 719 GDBM_BAD_AVAIL 720 GDBM_BAD_HASH_TABLE 721 GDBM_BAD_DIR_ENTRY 722 GDBM_FILE_CLOSE_ERROR 723 GDBM_FILE_SYNC_ERROR 724 GDBM_FILE_TRUNCATE_ERROR 725 GDBM_BUCKET_CACHE_CORRUPTED 726 GDBM_BAD_HASH_ENTRY 727 GDBM_MALFORMED_DATA 728 GDBM_OPT_BADVAL 729 GDBM_ERR_SNAPSHOT_CLONE 730 GDBM_ERR_REALPATH 731 GDBM_ERR_USAGE 732 gdbm_check_syserr 733); 734 735# This module isn't dual life, so no need for dev version numbers. 736$VERSION = '1.24'; 737 738our $gdbm_errno; 739 740XSLoader::load(); 741 7421; 743