1# = PStore -- Transactional File Storage for Ruby Objects
2#
3# pstore.rb -
4#   originally by matz
5#   documentation by Kev Jackson and James Edward Gray II
6#   improved by Hongli Lai
7#
8# See PStore for documentation.
9
10require "digest/md5"
11
12#
13# PStore implements a file based persistence mechanism based on a Hash.  User
14# code can store hierarchies of Ruby objects (values) into the data store file
15# by name (keys).  An object hierarchy may be just a single object.  User code
16# may later read values back from the data store or even update data, as needed.
17#
18# The transactional behavior ensures that any changes succeed or fail together.
19# This can be used to ensure that the data store is not left in a transitory
20# state, where some values were updated but others were not.
21#
22# Behind the scenes, Ruby objects are stored to the data store file with
23# Marshal.  That carries the usual limitations.  Proc objects cannot be
24# marshalled, for example.
25#
26# == Usage example:
27#
28#  require "pstore"
29#
30#  # a mock wiki object...
31#  class WikiPage
32#    def initialize( page_name, author, contents )
33#      @page_name = page_name
34#      @revisions = Array.new
35#
36#      add_revision(author, contents)
37#    end
38#
39#    attr_reader :page_name
40#
41#    def add_revision( author, contents )
42#      @revisions << { :created  => Time.now,
43#                      :author   => author,
44#                      :contents => contents }
45#    end
46#
47#    def wiki_page_references
48#      [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/)
49#    end
50#
51#    # ...
52#  end
53#
54#  # create a new page...
55#  home_page = WikiPage.new( "HomePage", "James Edward Gray II",
56#                            "A page about the JoysOfDocumentation..." )
57#
58#  # then we want to update page data and the index together, or not at all...
59#  wiki = PStore.new("wiki_pages.pstore")
60#  wiki.transaction do  # begin transaction; do all of this or none of it
61#    # store page...
62#    wiki[home_page.page_name] = home_page
63#    # ensure that an index has been created...
64#    wiki[:wiki_index] ||= Array.new
65#    # update wiki index...
66#    wiki[:wiki_index].push(*home_page.wiki_page_references)
67#  end                   # commit changes to wiki data store file
68#
69#  ### Some time later... ###
70#
71#  # read wiki data...
72#  wiki.transaction(true) do  # begin read-only transaction, no changes allowed
73#    wiki.roots.each do |data_root_name|
74#      p data_root_name
75#      p wiki[data_root_name]
76#    end
77#  end
78#
79# == Transaction modes
80#
81# By default, file integrity is only ensured as long as the operating system
82# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an
83# I/O error occurs while PStore is writing to its file, then the file will
84# become corrupted.
85#
86# You can prevent this by setting <em>pstore.ultra_safe = true</em>.
87# However, this results in a minor performance loss, and only works on platforms
88# that support atomic file renames. Please consult the documentation for
89# +ultra_safe+ for details.
90#
91# Needless to say, if you're storing valuable data with PStore, then you should
92# backup the PStore files from time to time.
93class PStore
94  RDWR_ACCESS = {mode: IO::RDWR | IO::CREAT | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
95  RD_ACCESS = {mode: IO::RDONLY | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
96  WR_ACCESS = {mode: IO::WRONLY | IO::CREAT | IO::TRUNC | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze
97
98  # The error type thrown by all PStore methods.
99  class Error < StandardError
100  end
101
102  # Whether PStore should do its best to prevent file corruptions, even when under
103  # unlikely-to-occur error conditions such as out-of-space conditions and other
104  # unusual OS filesystem errors. Setting this flag comes at the price in the form
105  # of a performance loss.
106  #
107  # This flag only has effect on platforms on which file renames are atomic (e.g.
108  # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false.
109  attr_accessor :ultra_safe
110
111  #
112  # To construct a PStore object, pass in the _file_ path where you would like
113  # the data to be stored.
114  #
115  # PStore objects are always reentrant. But if _thread_safe_ is set to true,
116  # then it will become thread-safe at the cost of a minor performance hit.
117  #
118  def initialize(file, thread_safe = false)
119    dir = File::dirname(file)
120    unless File::directory? dir
121      raise PStore::Error, format("directory %s does not exist", dir)
122    end
123    if File::exist? file and not File::readable? file
124      raise PStore::Error, format("file %s not readable", file)
125    end
126    @filename = file
127    @abort = false
128    @ultra_safe = false
129    @thread_safe = thread_safe
130    @lock = Mutex.new
131  end
132
133  # Raises PStore::Error if the calling code is not in a PStore#transaction.
134  def in_transaction
135    raise PStore::Error, "not in transaction" unless @lock.locked?
136  end
137  #
138  # Raises PStore::Error if the calling code is not in a PStore#transaction or
139  # if the code is in a read-only PStore#transaction.
140  #
141  def in_transaction_wr
142    in_transaction
143    raise PStore::Error, "in read-only transaction" if @rdonly
144  end
145  private :in_transaction, :in_transaction_wr
146
147  #
148  # Retrieves a value from the PStore file data, by _name_.  The hierarchy of
149  # Ruby objects stored under that root _name_ will be returned.
150  #
151  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
152  # raise PStore::Error if called at any other time.
153  #
154  def [](name)
155    in_transaction
156    @table[name]
157  end
158  #
159  # This method is just like PStore#[], save that you may also provide a
160  # _default_ value for the object.  In the event the specified _name_ is not
161  # found in the data store, your _default_ will be returned instead.  If you do
162  # not specify a default, PStore::Error will be raised if the object is not
163  # found.
164  #
165  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
166  # raise PStore::Error if called at any other time.
167  #
168  def fetch(name, default=PStore::Error)
169    in_transaction
170    unless @table.key? name
171      if default == PStore::Error
172        raise PStore::Error, format("undefined root name `%s'", name)
173      else
174        return default
175      end
176    end
177    @table[name]
178  end
179  #
180  # Stores an individual Ruby object or a hierarchy of Ruby objects in the data
181  # store file under the root _name_.  Assigning to a _name_ already in the data
182  # store clobbers the old data.
183  #
184  # == Example:
185  #
186  #  require "pstore"
187  #
188  #  store = PStore.new("data_file.pstore")
189  #  store.transaction do  # begin transaction
190  #    # load some data into the store...
191  #    store[:single_object] = "My data..."
192  #    store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"],
193  #                              "James Gray"  => ["erb.rb", "pstore.rb"] }
194  #  end                   # commit changes to data store file
195  #
196  # *WARNING*:  This method is only valid in a PStore#transaction and it cannot
197  # be read-only.  It will raise PStore::Error if called at any other time.
198  #
199  def []=(name, value)
200    in_transaction_wr
201    @table[name] = value
202  end
203  #
204  # Removes an object hierarchy from the data store, by _name_.
205  #
206  # *WARNING*:  This method is only valid in a PStore#transaction and it cannot
207  # be read-only.  It will raise PStore::Error if called at any other time.
208  #
209  def delete(name)
210    in_transaction_wr
211    @table.delete name
212  end
213
214  #
215  # Returns the names of all object hierarchies currently in the store.
216  #
217  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
218  # raise PStore::Error if called at any other time.
219  #
220  def roots
221    in_transaction
222    @table.keys
223  end
224  #
225  # Returns true if the supplied _name_ is currently in the data store.
226  #
227  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
228  # raise PStore::Error if called at any other time.
229  #
230  def root?(name)
231    in_transaction
232    @table.key? name
233  end
234  # Returns the path to the data store file.
235  def path
236    @filename
237  end
238
239  #
240  # Ends the current PStore#transaction, committing any changes to the data
241  # store immediately.
242  #
243  # == Example:
244  #
245  #  require "pstore"
246  #
247  #  store = PStore.new("data_file.pstore")
248  #  store.transaction do  # begin transaction
249  #    # load some data into the store...
250  #    store[:one] = 1
251  #    store[:two] = 2
252  #
253  #    store.commit        # end transaction here, committing changes
254  #
255  #    store[:three] = 3   # this change is never reached
256  #  end
257  #
258  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
259  # raise PStore::Error if called at any other time.
260  #
261  def commit
262    in_transaction
263    @abort = false
264    throw :pstore_abort_transaction
265  end
266  #
267  # Ends the current PStore#transaction, discarding any changes to the data
268  # store.
269  #
270  # == Example:
271  #
272  #  require "pstore"
273  #
274  #  store = PStore.new("data_file.pstore")
275  #  store.transaction do  # begin transaction
276  #    store[:one] = 1     # this change is not applied, see below...
277  #    store[:two] = 2     # this change is not applied, see below...
278  #
279  #    store.abort         # end transaction here, discard all changes
280  #
281  #    store[:three] = 3   # this change is never reached
282  #  end
283  #
284  # *WARNING*:  This method is only valid in a PStore#transaction.  It will
285  # raise PStore::Error if called at any other time.
286  #
287  def abort
288    in_transaction
289    @abort = true
290    throw :pstore_abort_transaction
291  end
292
293  #
294  # Opens a new transaction for the data store.  Code executed inside a block
295  # passed to this method may read and write data to and from the data store
296  # file.
297  #
298  # At the end of the block, changes are committed to the data store
299  # automatically.  You may exit the transaction early with a call to either
300  # PStore#commit or PStore#abort.  See those methods for details about how
301  # changes are handled.  Raising an uncaught Exception in the block is
302  # equivalent to calling PStore#abort.
303  #
304  # If _read_only_ is set to +true+, you will only be allowed to read from the
305  # data store during the transaction and any attempts to change the data will
306  # raise a PStore::Error.
307  #
308  # Note that PStore does not support nested transactions.
309  #
310  def transaction(read_only = false)  # :yields:  pstore
311    value = nil
312    if !@thread_safe
313      raise PStore::Error, "nested transaction" unless @lock.try_lock
314    else
315      begin
316        @lock.lock
317      rescue ThreadError
318        raise PStore::Error, "nested transaction"
319      end
320    end
321    begin
322      @rdonly = read_only
323      @abort = false
324      file = open_and_lock_file(@filename, read_only)
325      if file
326        begin
327          @table, checksum, original_data_size = load_data(file, read_only)
328
329          catch(:pstore_abort_transaction) do
330            value = yield(self)
331          end
332
333          if !@abort && !read_only
334            save_data(checksum, original_data_size, file)
335          end
336        ensure
337          file.close if !file.closed?
338        end
339      else
340        # This can only occur if read_only == true.
341        @table = {}
342        catch(:pstore_abort_transaction) do
343          value = yield(self)
344        end
345      end
346    ensure
347      @lock.unlock
348    end
349    value
350  end
351
352  private
353  # Constant for relieving Ruby's garbage collector.
354  EMPTY_STRING = ""
355  EMPTY_MARSHAL_DATA = Marshal.dump({})
356  EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA)
357
358  #
359  # Open the specified filename (either in read-only mode or in
360  # read-write mode) and lock it for reading or writing.
361  #
362  # The opened File object will be returned. If _read_only_ is true,
363  # and the file does not exist, then nil will be returned.
364  #
365  # All exceptions are propagated.
366  #
367  def open_and_lock_file(filename, read_only)
368    if read_only
369      begin
370        file = File.new(filename, RD_ACCESS)
371        begin
372          file.flock(File::LOCK_SH)
373          return file
374        rescue
375          file.close
376          raise
377        end
378      rescue Errno::ENOENT
379        return nil
380      end
381    else
382      file = File.new(filename, RDWR_ACCESS)
383      file.flock(File::LOCK_EX)
384      return file
385    end
386  end
387
388  # Load the given PStore file.
389  # If +read_only+ is true, the unmarshalled Hash will be returned.
390  # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled
391  # Hash, an MD5 checksum of the data, and the size of the data.
392  def load_data(file, read_only)
393    if read_only
394      begin
395        table = load(file)
396        raise Error, "PStore file seems to be corrupted." unless table.is_a?(Hash)
397      rescue EOFError
398        # This seems to be a newly-created file.
399        table = {}
400      end
401      table
402    else
403      data = file.read
404      if data.empty?
405        # This seems to be a newly-created file.
406        table = {}
407        checksum = empty_marshal_checksum
408        size = empty_marshal_data.bytesize
409      else
410        table = load(data)
411        checksum = Digest::MD5.digest(data)
412        size = data.bytesize
413        raise Error, "PStore file seems to be corrupted." unless table.is_a?(Hash)
414      end
415      data.replace(EMPTY_STRING)
416      [table, checksum, size]
417    end
418  end
419
420  def on_windows?
421    is_windows = RUBY_PLATFORM =~ /mswin|mingw|bccwin|wince/
422    self.class.__send__(:define_method, :on_windows?) do
423      is_windows
424    end
425    is_windows
426  end
427
428  def save_data(original_checksum, original_file_size, file)
429    new_data = dump(@table)
430
431    if new_data.bytesize != original_file_size || Digest::MD5.digest(new_data) != original_checksum
432      if @ultra_safe && !on_windows?
433        # Windows doesn't support atomic file renames.
434        save_data_with_atomic_file_rename_strategy(new_data, file)
435      else
436        save_data_with_fast_strategy(new_data, file)
437      end
438    end
439
440    new_data.replace(EMPTY_STRING)
441  end
442
443  def save_data_with_atomic_file_rename_strategy(data, file)
444    temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}"
445    temp_file = File.new(temp_filename, WR_ACCESS)
446    begin
447      temp_file.flock(File::LOCK_EX)
448      temp_file.write(data)
449      temp_file.flush
450      File.rename(temp_filename, @filename)
451    rescue
452      File.unlink(temp_file) rescue nil
453      raise
454    ensure
455      temp_file.close
456    end
457  end
458
459  def save_data_with_fast_strategy(data, file)
460    file.rewind
461    file.write(data)
462    file.truncate(data.bytesize)
463  end
464
465
466  # This method is just a wrapped around Marshal.dump
467  # to allow subclass overriding used in YAML::Store.
468  def dump(table)  # :nodoc:
469    Marshal::dump(table)
470  end
471
472  # This method is just a wrapped around Marshal.load.
473  # to allow subclass overriding used in YAML::Store.
474  def load(content)  # :nodoc:
475    Marshal::load(content)
476  end
477
478  def empty_marshal_data
479    EMPTY_MARSHAL_DATA
480  end
481  def empty_marshal_checksum
482    EMPTY_MARSHAL_CHECKSUM
483  end
484end
485
486# :enddoc:
487
488if __FILE__ == $0
489  db = PStore.new("/tmp/foo")
490  db.transaction do
491    p db.roots
492    ary = db["root"] = [1,2,3,4]
493    ary[1] = [1,1.5]
494  end
495
496  1000.times do
497    db.transaction do
498      db["root"][0] += 1
499      p db["root"][0]
500    end
501  end
502
503  db.transaction(true) do
504    p db["root"]
505  end
506end
507