1# = PStore -- Transactional File Storage for Ruby Objects 2# 3# pstore.rb - 4# originally by matz 5# documentation by Kev Jackson and James Edward Gray II 6# improved by Hongli Lai 7# 8# See PStore for documentation. 9 10require "digest/md5" 11 12# 13# PStore implements a file based persistence mechanism based on a Hash. User 14# code can store hierarchies of Ruby objects (values) into the data store file 15# by name (keys). An object hierarchy may be just a single object. User code 16# may later read values back from the data store or even update data, as needed. 17# 18# The transactional behavior ensures that any changes succeed or fail together. 19# This can be used to ensure that the data store is not left in a transitory 20# state, where some values were updated but others were not. 21# 22# Behind the scenes, Ruby objects are stored to the data store file with 23# Marshal. That carries the usual limitations. Proc objects cannot be 24# marshalled, for example. 25# 26# == Usage example: 27# 28# require "pstore" 29# 30# # a mock wiki object... 31# class WikiPage 32# def initialize( page_name, author, contents ) 33# @page_name = page_name 34# @revisions = Array.new 35# 36# add_revision(author, contents) 37# end 38# 39# attr_reader :page_name 40# 41# def add_revision( author, contents ) 42# @revisions << { :created => Time.now, 43# :author => author, 44# :contents => contents } 45# end 46# 47# def wiki_page_references 48# [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/) 49# end 50# 51# # ... 52# end 53# 54# # create a new page... 55# home_page = WikiPage.new( "HomePage", "James Edward Gray II", 56# "A page about the JoysOfDocumentation..." ) 57# 58# # then we want to update page data and the index together, or not at all... 59# wiki = PStore.new("wiki_pages.pstore") 60# wiki.transaction do # begin transaction; do all of this or none of it 61# # store page... 62# wiki[home_page.page_name] = home_page 63# # ensure that an index has been created... 64# wiki[:wiki_index] ||= Array.new 65# # update wiki index... 66# wiki[:wiki_index].push(*home_page.wiki_page_references) 67# end # commit changes to wiki data store file 68# 69# ### Some time later... ### 70# 71# # read wiki data... 72# wiki.transaction(true) do # begin read-only transaction, no changes allowed 73# wiki.roots.each do |data_root_name| 74# p data_root_name 75# p wiki[data_root_name] 76# end 77# end 78# 79# == Transaction modes 80# 81# By default, file integrity is only ensured as long as the operating system 82# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an 83# I/O error occurs while PStore is writing to its file, then the file will 84# become corrupted. 85# 86# You can prevent this by setting <em>pstore.ultra_safe = true</em>. 87# However, this results in a minor performance loss, and only works on platforms 88# that support atomic file renames. Please consult the documentation for 89# +ultra_safe+ for details. 90# 91# Needless to say, if you're storing valuable data with PStore, then you should 92# backup the PStore files from time to time. 93class PStore 94 RDWR_ACCESS = {mode: IO::RDWR | IO::CREAT | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze 95 RD_ACCESS = {mode: IO::RDONLY | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze 96 WR_ACCESS = {mode: IO::WRONLY | IO::CREAT | IO::TRUNC | IO::BINARY, encoding: Encoding::ASCII_8BIT}.freeze 97 98 # The error type thrown by all PStore methods. 99 class Error < StandardError 100 end 101 102 # Whether PStore should do its best to prevent file corruptions, even when under 103 # unlikely-to-occur error conditions such as out-of-space conditions and other 104 # unusual OS filesystem errors. Setting this flag comes at the price in the form 105 # of a performance loss. 106 # 107 # This flag only has effect on platforms on which file renames are atomic (e.g. 108 # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false. 109 attr_accessor :ultra_safe 110 111 # 112 # To construct a PStore object, pass in the _file_ path where you would like 113 # the data to be stored. 114 # 115 # PStore objects are always reentrant. But if _thread_safe_ is set to true, 116 # then it will become thread-safe at the cost of a minor performance hit. 117 # 118 def initialize(file, thread_safe = false) 119 dir = File::dirname(file) 120 unless File::directory? dir 121 raise PStore::Error, format("directory %s does not exist", dir) 122 end 123 if File::exist? file and not File::readable? file 124 raise PStore::Error, format("file %s not readable", file) 125 end 126 @filename = file 127 @abort = false 128 @ultra_safe = false 129 @thread_safe = thread_safe 130 @lock = Mutex.new 131 end 132 133 # Raises PStore::Error if the calling code is not in a PStore#transaction. 134 def in_transaction 135 raise PStore::Error, "not in transaction" unless @lock.locked? 136 end 137 # 138 # Raises PStore::Error if the calling code is not in a PStore#transaction or 139 # if the code is in a read-only PStore#transaction. 140 # 141 def in_transaction_wr 142 in_transaction 143 raise PStore::Error, "in read-only transaction" if @rdonly 144 end 145 private :in_transaction, :in_transaction_wr 146 147 # 148 # Retrieves a value from the PStore file data, by _name_. The hierarchy of 149 # Ruby objects stored under that root _name_ will be returned. 150 # 151 # *WARNING*: This method is only valid in a PStore#transaction. It will 152 # raise PStore::Error if called at any other time. 153 # 154 def [](name) 155 in_transaction 156 @table[name] 157 end 158 # 159 # This method is just like PStore#[], save that you may also provide a 160 # _default_ value for the object. In the event the specified _name_ is not 161 # found in the data store, your _default_ will be returned instead. If you do 162 # not specify a default, PStore::Error will be raised if the object is not 163 # found. 164 # 165 # *WARNING*: This method is only valid in a PStore#transaction. It will 166 # raise PStore::Error if called at any other time. 167 # 168 def fetch(name, default=PStore::Error) 169 in_transaction 170 unless @table.key? name 171 if default == PStore::Error 172 raise PStore::Error, format("undefined root name `%s'", name) 173 else 174 return default 175 end 176 end 177 @table[name] 178 end 179 # 180 # Stores an individual Ruby object or a hierarchy of Ruby objects in the data 181 # store file under the root _name_. Assigning to a _name_ already in the data 182 # store clobbers the old data. 183 # 184 # == Example: 185 # 186 # require "pstore" 187 # 188 # store = PStore.new("data_file.pstore") 189 # store.transaction do # begin transaction 190 # # load some data into the store... 191 # store[:single_object] = "My data..." 192 # store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"], 193 # "James Gray" => ["erb.rb", "pstore.rb"] } 194 # end # commit changes to data store file 195 # 196 # *WARNING*: This method is only valid in a PStore#transaction and it cannot 197 # be read-only. It will raise PStore::Error if called at any other time. 198 # 199 def []=(name, value) 200 in_transaction_wr 201 @table[name] = value 202 end 203 # 204 # Removes an object hierarchy from the data store, by _name_. 205 # 206 # *WARNING*: This method is only valid in a PStore#transaction and it cannot 207 # be read-only. It will raise PStore::Error if called at any other time. 208 # 209 def delete(name) 210 in_transaction_wr 211 @table.delete name 212 end 213 214 # 215 # Returns the names of all object hierarchies currently in the store. 216 # 217 # *WARNING*: This method is only valid in a PStore#transaction. It will 218 # raise PStore::Error if called at any other time. 219 # 220 def roots 221 in_transaction 222 @table.keys 223 end 224 # 225 # Returns true if the supplied _name_ is currently in the data store. 226 # 227 # *WARNING*: This method is only valid in a PStore#transaction. It will 228 # raise PStore::Error if called at any other time. 229 # 230 def root?(name) 231 in_transaction 232 @table.key? name 233 end 234 # Returns the path to the data store file. 235 def path 236 @filename 237 end 238 239 # 240 # Ends the current PStore#transaction, committing any changes to the data 241 # store immediately. 242 # 243 # == Example: 244 # 245 # require "pstore" 246 # 247 # store = PStore.new("data_file.pstore") 248 # store.transaction do # begin transaction 249 # # load some data into the store... 250 # store[:one] = 1 251 # store[:two] = 2 252 # 253 # store.commit # end transaction here, committing changes 254 # 255 # store[:three] = 3 # this change is never reached 256 # end 257 # 258 # *WARNING*: This method is only valid in a PStore#transaction. It will 259 # raise PStore::Error if called at any other time. 260 # 261 def commit 262 in_transaction 263 @abort = false 264 throw :pstore_abort_transaction 265 end 266 # 267 # Ends the current PStore#transaction, discarding any changes to the data 268 # store. 269 # 270 # == Example: 271 # 272 # require "pstore" 273 # 274 # store = PStore.new("data_file.pstore") 275 # store.transaction do # begin transaction 276 # store[:one] = 1 # this change is not applied, see below... 277 # store[:two] = 2 # this change is not applied, see below... 278 # 279 # store.abort # end transaction here, discard all changes 280 # 281 # store[:three] = 3 # this change is never reached 282 # end 283 # 284 # *WARNING*: This method is only valid in a PStore#transaction. It will 285 # raise PStore::Error if called at any other time. 286 # 287 def abort 288 in_transaction 289 @abort = true 290 throw :pstore_abort_transaction 291 end 292 293 # 294 # Opens a new transaction for the data store. Code executed inside a block 295 # passed to this method may read and write data to and from the data store 296 # file. 297 # 298 # At the end of the block, changes are committed to the data store 299 # automatically. You may exit the transaction early with a call to either 300 # PStore#commit or PStore#abort. See those methods for details about how 301 # changes are handled. Raising an uncaught Exception in the block is 302 # equivalent to calling PStore#abort. 303 # 304 # If _read_only_ is set to +true+, you will only be allowed to read from the 305 # data store during the transaction and any attempts to change the data will 306 # raise a PStore::Error. 307 # 308 # Note that PStore does not support nested transactions. 309 # 310 def transaction(read_only = false) # :yields: pstore 311 value = nil 312 if !@thread_safe 313 raise PStore::Error, "nested transaction" unless @lock.try_lock 314 else 315 begin 316 @lock.lock 317 rescue ThreadError 318 raise PStore::Error, "nested transaction" 319 end 320 end 321 begin 322 @rdonly = read_only 323 @abort = false 324 file = open_and_lock_file(@filename, read_only) 325 if file 326 begin 327 @table, checksum, original_data_size = load_data(file, read_only) 328 329 catch(:pstore_abort_transaction) do 330 value = yield(self) 331 end 332 333 if !@abort && !read_only 334 save_data(checksum, original_data_size, file) 335 end 336 ensure 337 file.close if !file.closed? 338 end 339 else 340 # This can only occur if read_only == true. 341 @table = {} 342 catch(:pstore_abort_transaction) do 343 value = yield(self) 344 end 345 end 346 ensure 347 @lock.unlock 348 end 349 value 350 end 351 352 private 353 # Constant for relieving Ruby's garbage collector. 354 EMPTY_STRING = "" 355 EMPTY_MARSHAL_DATA = Marshal.dump({}) 356 EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA) 357 358 # 359 # Open the specified filename (either in read-only mode or in 360 # read-write mode) and lock it for reading or writing. 361 # 362 # The opened File object will be returned. If _read_only_ is true, 363 # and the file does not exist, then nil will be returned. 364 # 365 # All exceptions are propagated. 366 # 367 def open_and_lock_file(filename, read_only) 368 if read_only 369 begin 370 file = File.new(filename, RD_ACCESS) 371 begin 372 file.flock(File::LOCK_SH) 373 return file 374 rescue 375 file.close 376 raise 377 end 378 rescue Errno::ENOENT 379 return nil 380 end 381 else 382 file = File.new(filename, RDWR_ACCESS) 383 file.flock(File::LOCK_EX) 384 return file 385 end 386 end 387 388 # Load the given PStore file. 389 # If +read_only+ is true, the unmarshalled Hash will be returned. 390 # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled 391 # Hash, an MD5 checksum of the data, and the size of the data. 392 def load_data(file, read_only) 393 if read_only 394 begin 395 table = load(file) 396 raise Error, "PStore file seems to be corrupted." unless table.is_a?(Hash) 397 rescue EOFError 398 # This seems to be a newly-created file. 399 table = {} 400 end 401 table 402 else 403 data = file.read 404 if data.empty? 405 # This seems to be a newly-created file. 406 table = {} 407 checksum = empty_marshal_checksum 408 size = empty_marshal_data.bytesize 409 else 410 table = load(data) 411 checksum = Digest::MD5.digest(data) 412 size = data.bytesize 413 raise Error, "PStore file seems to be corrupted." unless table.is_a?(Hash) 414 end 415 data.replace(EMPTY_STRING) 416 [table, checksum, size] 417 end 418 end 419 420 def on_windows? 421 is_windows = RUBY_PLATFORM =~ /mswin|mingw|bccwin|wince/ 422 self.class.__send__(:define_method, :on_windows?) do 423 is_windows 424 end 425 is_windows 426 end 427 428 def save_data(original_checksum, original_file_size, file) 429 new_data = dump(@table) 430 431 if new_data.bytesize != original_file_size || Digest::MD5.digest(new_data) != original_checksum 432 if @ultra_safe && !on_windows? 433 # Windows doesn't support atomic file renames. 434 save_data_with_atomic_file_rename_strategy(new_data, file) 435 else 436 save_data_with_fast_strategy(new_data, file) 437 end 438 end 439 440 new_data.replace(EMPTY_STRING) 441 end 442 443 def save_data_with_atomic_file_rename_strategy(data, file) 444 temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}" 445 temp_file = File.new(temp_filename, WR_ACCESS) 446 begin 447 temp_file.flock(File::LOCK_EX) 448 temp_file.write(data) 449 temp_file.flush 450 File.rename(temp_filename, @filename) 451 rescue 452 File.unlink(temp_file) rescue nil 453 raise 454 ensure 455 temp_file.close 456 end 457 end 458 459 def save_data_with_fast_strategy(data, file) 460 file.rewind 461 file.write(data) 462 file.truncate(data.bytesize) 463 end 464 465 466 # This method is just a wrapped around Marshal.dump 467 # to allow subclass overriding used in YAML::Store. 468 def dump(table) # :nodoc: 469 Marshal::dump(table) 470 end 471 472 # This method is just a wrapped around Marshal.load. 473 # to allow subclass overriding used in YAML::Store. 474 def load(content) # :nodoc: 475 Marshal::load(content) 476 end 477 478 def empty_marshal_data 479 EMPTY_MARSHAL_DATA 480 end 481 def empty_marshal_checksum 482 EMPTY_MARSHAL_CHECKSUM 483 end 484end 485 486# :enddoc: 487 488if __FILE__ == $0 489 db = PStore.new("/tmp/foo") 490 db.transaction do 491 p db.roots 492 ary = db["root"] = [1,2,3,4] 493 ary[1] = [1,1.5] 494 end 495 496 1000.times do 497 db.transaction do 498 db["root"][0] += 1 499 p db["root"][0] 500 end 501 end 502 503 db.transaction(true) do 504 p db["root"] 505 end 506end 507