1// Copyright 2016 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file describes the in-memory structures which construct
6// a MinFS filesystem.
7
8#pragma once
9
10#include <inttypes.h>
11
12#ifdef __Fuchsia__
13#include <fbl/auto_lock.h>
14#include <fs/managed-vfs.h>
15#include <fs/remote.h>
16#include <fs/watcher.h>
17#include <fuchsia/io/c/fidl.h>
18#include <fuchsia/minfs/c/fidl.h>
19#include <lib/sync/completion.h>
20#include <lib/zx/vmo.h>
21#endif
22
23#include <fbl/algorithm.h>
24#include <fbl/function.h>
25#include <fbl/intrusive_hash_table.h>
26#include <fbl/intrusive_single_list.h>
27#include <fbl/macros.h>
28#include <fbl/ref_ptr.h>
29#include <fbl/unique_ptr.h>
30#include <fs/block-txn.h>
31#include <fs/locking.h>
32#include <fs/ticker.h>
33#include <fs/trace.h>
34#include <fs/vfs.h>
35#include <fs/vnode.h>
36#include <lib/fzl/mapped-vmo.h>
37#include <lib/zircon-internal/fnv1hash.h>
38#include <minfs/allocator.h>
39#include <minfs/format.h>
40#include <minfs/inode-manager.h>
41#include <minfs/superblock.h>
42#include <minfs/writeback.h>
43
44#define EXTENT_COUNT 5
45
46// A compile-time debug check, which, if enabled, causes
47// inline functions to be expanded to error checking code.
48// Since this may be expensive, it is typically turned
49// off, except for debugging.
50// #define MINFS_PARANOID_MODE
51
52namespace minfs {
53
54#ifdef __Fuchsia__
55using RawBitmap = bitmap::RawBitmapGeneric<bitmap::VmoStorage>;
56#else
57using RawBitmap = bitmap::RawBitmapGeneric<bitmap::DefaultStorage>;
58#endif
59
60#ifdef __Fuchsia__
61// Validate that |vmo| is large enough to access block |blk|,
62// relative to the start of the vmo.
63inline void validate_vmo_size(zx_handle_t vmo, blk_t blk) {
64#ifdef MINFS_PARANOID_MODE
65    uint64_t size;
66    size_t min = (blk + 1) * kMinfsBlockSize;
67    ZX_ASSERT(zx_vmo_get_size(vmo, &size) == ZX_OK);
68    ZX_ASSERT_MSG(size >= min, "VMO size %" PRIu64 " too small for access at block %u\n",
69                  size, blk);
70#endif // MINFS_PARANOID_MODE
71}
72#endif // __Fuchsia__
73
74// minfs_sync_vnode flags
75constexpr uint32_t kMxFsSyncDefault = 0; // default: no implicit time update
76constexpr uint32_t kMxFsSyncMtime = (1 << 0);
77constexpr uint32_t kMxFsSyncCtime = (1 << 1);
78
79constexpr uint32_t kMinfsBlockCacheSize = 64;
80
81// Used by fsck
82class MinfsChecker;
83class VnodeMinfs;
84
85using SyncCallback = fs::Vnode::SyncCallback;
86
87#ifndef __Fuchsia__
88// Store start block + length for all extents. These may differ from info block for
89// sparse files.
90class BlockOffsets {
91public:
92    BlockOffsets(const Bcache* bc, const Superblock* sb);
93
94    blk_t IbmStartBlock() const { return ibm_start_block_; }
95    blk_t IbmBlockCount() const { return ibm_block_count_; }
96
97    blk_t AbmStartBlock() const { return abm_start_block_; }
98    blk_t AbmBlockCount() const { return abm_block_count_; }
99
100    blk_t InoStartBlock() const { return ino_start_block_; }
101    blk_t InoBlockCount() const { return ino_block_count_; }
102
103    blk_t DatStartBlock() const { return dat_start_block_; }
104    blk_t DatBlockCount() const { return dat_block_count_; }
105
106private:
107    blk_t ibm_start_block_;
108    blk_t ibm_block_count_;
109
110    blk_t abm_start_block_;
111    blk_t abm_block_count_;
112
113    blk_t ino_start_block_;
114    blk_t ino_block_count_;
115
116    blk_t dat_start_block_;
117    blk_t dat_block_count_;
118};
119#endif
120
121class Minfs :
122#ifdef __Fuchsia__
123    public fs::ManagedVfs,
124#else
125    public fs::Vfs,
126#endif
127    public fbl::RefCounted<Minfs> {
128public:
129    DISALLOW_COPY_ASSIGN_AND_MOVE(Minfs);
130
131    ~Minfs();
132
133    static zx_status_t Create(fbl::unique_ptr<Bcache> bc, const minfs_info_t* info,
134                              fbl::unique_ptr<Minfs>* out);
135
136    // instantiate a vnode from an inode
137    // the inode must exist in the file system
138    zx_status_t VnodeGet(fbl::RefPtr<VnodeMinfs>* out, ino_t ino);
139
140    // instantiate a vnode with a new inode
141    zx_status_t VnodeNew(Transaction* state, fbl::RefPtr<VnodeMinfs>* out, uint32_t type);
142
143    // Insert, lookup, and remove vnode from hash map.
144    void VnodeInsert(VnodeMinfs* vn) FS_TA_EXCLUDES(hash_lock_);
145    fbl::RefPtr<VnodeMinfs> VnodeLookup(uint32_t ino) FS_TA_EXCLUDES(hash_lock_);
146    void VnodeRelease(VnodeMinfs* vn) FS_TA_EXCLUDES(hash_lock_);
147
148    // Allocate a new data block.
149    void BlockNew(Transaction* state, blk_t* out_bno);
150
151    // Free a data block.
152    void BlockFree(WriteTxn* txn, blk_t bno);
153
154    // Queries the underlying FVM, if it exists.
155    zx_status_t FVMQuery(fvm_info_t* info) const;
156
157    // Free ino in inode bitmap, release all blocks held by inode.
158    zx_status_t InoFree(VnodeMinfs* vn, WritebackWork* wb);
159
160    // Writes back an inode into the inode table on persistent storage.
161    // Does not modify inode bitmap.
162    void InodeUpdate(WriteTxn* txn, ino_t ino, const minfs_inode_t* inode) {
163        inodes_->Update(txn, ino, inode);
164    }
165
166    // Reads an inode from the inode table into memory.
167    void InodeLoad(ino_t ino, minfs_inode_t* out) const {
168        inodes_->Load(ino, out);
169    }
170
171    void ValidateBno(blk_t bno) const {
172        ZX_DEBUG_ASSERT(bno != 0);
173        ZX_DEBUG_ASSERT(bno < Info().block_count);
174    }
175
176    zx_status_t BeginTransaction(size_t reserve_inodes, size_t reserve_blocks,
177                                 fbl::unique_ptr<Transaction>* out);
178
179    void CommitTransaction(fbl::unique_ptr<Transaction> state) {
180        // On enqueue, unreserve any remaining reserved blocks/inodes tracked by work.
181#ifdef __Fuchsia__
182        writeback_->Enqueue(state->RemoveWork());
183#else
184        state->GetWork()->Complete();
185#endif
186    }
187
188#ifdef __Fuchsia__
189    void SetUnmountCallback(fbl::Closure closure) { on_unmount_ = fbl::move(closure); }
190    void Shutdown(fs::Vfs::ShutdownCallback cb) final;
191
192    // Returns a unique identifier for this instance.
193    uint64_t GetFsId() const { return fs_id_; }
194
195    // Signals the completion object as soon as...
196    // (1) A sync probe has entered and exited the writeback queue, and
197    // (2) The block cache has sync'd with the underlying block device.
198    void Sync(SyncCallback closure);
199#endif
200
201    // The following methods are used to read one block from the specified extent,
202    // from relative block |bno|.
203    // |data| is an out parameter that must be a block in size, provided by the caller
204    // These functions are single-block and synchronous. On Fuchsia, using the batched read
205    // functions is preferred.
206    zx_status_t ReadDat(blk_t bno, void* data);
207
208    void SetMetrics(bool enable) { collecting_metrics_ = enable; }
209    fs::Ticker StartTicker() { return fs::Ticker(collecting_metrics_); }
210
211    // Update aggregate information about VMO initialization.
212    void UpdateInitMetrics(uint32_t dnum_count, uint32_t inum_count,
213                           uint32_t dinum_count, uint64_t user_data_size,
214                           const fs::Duration& duration);
215    // Update aggregate information about looking up vnodes by name.
216    void UpdateLookupMetrics(bool success, const fs::Duration& duration);
217    // Update aggregate information about looking up vnodes by inode.
218    void UpdateOpenMetrics(bool cache_hit, const fs::Duration& duration);
219    // Update aggregate information about inode creation.
220    void UpdateCreateMetrics(bool success, const fs::Duration& duration);
221    // Update aggregate information about reading from Vnodes.
222    void UpdateReadMetrics(uint64_t size, const fs::Duration& duration);
223    // Update aggregate information about writing to Vnodes.
224    void UpdateWriteMetrics(uint64_t size, const fs::Duration& duration);
225    // Update aggregate information about truncating Vnodes.
226    void UpdateTruncateMetrics(const fs::Duration& duration);
227    // Update aggregate information about unlinking Vnodes.
228    void UpdateUnlinkMetrics(bool success, const fs::Duration& duration);
229    // Update aggregate information about renaming Vnodes.
230    void UpdateRenameMetrics(bool success, const fs::Duration& duration);
231
232#ifdef __Fuchsia__
233    // Acquire a copy of the collected metrics.
234    zx_status_t GetMetrics(fuchsia_minfs_Metrics* out) const {
235        if (collecting_metrics_) {
236            memcpy(out, &metrics_, sizeof(metrics_));
237            return ZX_OK;
238        }
239        return ZX_ERR_UNAVAILABLE;
240    }
241#endif
242
243    // Return an immutable reference to a copy of the internal info.
244    const minfs_info_t& Info() const {
245        return sb_->Info();
246    }
247
248    // TODO(rvargas): Make private.
249    fbl::unique_ptr<Bcache> bc_;
250
251private:
252    // Fsck can introspect Minfs
253    friend class MinfsChecker;
254    using HashTable = fbl::HashTable<ino_t, VnodeMinfs*>;
255
256#ifdef __Fuchsia__
257    Minfs(fbl::unique_ptr<Bcache> bc, fbl::unique_ptr<Superblock> sb,
258          fbl::unique_ptr<Allocator> block_allocator,
259          fbl::unique_ptr<InodeManager> inodes,
260          fbl::unique_ptr<WritebackBuffer> writeback,
261          uint64_t fs_id);
262#else
263    Minfs(fbl::unique_ptr<Bcache> bc, fbl::unique_ptr<Superblock> sb,
264          fbl::unique_ptr<Allocator> block_allocator,
265          fbl::unique_ptr<InodeManager> inodes, BlockOffsets offsets);
266#endif
267
268    // Find a free inode, allocate it in the inode bitmap, and write it back to disk
269    void InoNew(Transaction* state, const minfs_inode_t* inode, ino_t* out_ino);
270
271    // Enqueues an update to the super block.
272    void WriteInfo(WriteTxn* txn);
273
274    // Creates an unique identifier for this instance. This is to be called only during
275    // "construction".
276    static zx_status_t CreateFsId(uint64_t* out);
277
278#ifndef __Fuchsia__
279    zx_status_t ReadBlk(blk_t bno, blk_t start, blk_t soft_max, blk_t hard_max, void* data);
280#endif
281
282    // Global information about the filesystem.
283    fbl::unique_ptr<Superblock> sb_;
284    fbl::unique_ptr<Allocator> block_allocator_;
285    fbl::unique_ptr<InodeManager> inodes_;
286
287    // Vnodes exist in the hash table as long as one or more reference exists;
288    // when the Vnode is deleted, it is immediately removed from the map.
289#ifdef __Fuchsia__
290    fbl::Mutex hash_lock_;
291#endif
292    HashTable vnode_hash_ FS_TA_GUARDED(hash_lock_){};
293
294    bool collecting_metrics_ = false;
295#ifdef __Fuchsia__
296    fbl::Closure on_unmount_{};
297    fuchsia_minfs_Metrics metrics_ = {};
298    fbl::unique_ptr<WritebackBuffer> writeback_;
299    uint64_t fs_id_{};
300#else
301    // Store start block + length for all extents. These may differ from info block for
302    // sparse files.
303    BlockOffsets offsets_;
304#endif
305};
306
307struct DirectoryOffset {
308    size_t off = 0;      // Offset in directory of current record
309    size_t off_prev = 0; // Offset in directory of previous record
310};
311
312struct DirArgs {
313    fbl::StringPiece name;
314    ino_t ino;
315    uint32_t type;
316    uint32_t reclen;
317    Transaction* state;
318    DirectoryOffset offs;
319};
320
321class VnodeMinfs final : public fs::Vnode,
322                         public fbl::SinglyLinkedListable<VnodeMinfs*>,
323                         public fbl::Recyclable<VnodeMinfs> {
324public:
325    ~VnodeMinfs();
326
327    // Allocates a new Vnode and initializes the in-memory inode structure given the type, where
328    // type is one of:
329    // - kMinfsTypeFile
330    // - kMinfsTypeDir
331    //
332    // Sets create / modify times of the new node.
333    // Does not allocate an inode number for the Vnode.
334    static void Allocate(Minfs* fs, uint32_t type, fbl::RefPtr<VnodeMinfs>* out);
335
336    // Allocates a Vnode, loading |ino| from storage.
337    //
338    // Doesn't update create / modify times of the node.
339    static zx_status_t Recreate(Minfs* fs, ino_t ino, fbl::RefPtr<VnodeMinfs>* out);
340
341    bool IsDirectory() const { return inode_.magic == kMinfsMagicDir; }
342    bool IsUnlinked() const { return inode_.link_count == 0; }
343    zx_status_t CanUnlink() const;
344
345    const minfs_inode_t* GetInode() const { return &inode_; }
346
347    ino_t GetKey() const { return ino_; }
348    // Should only be called once for the VnodeMinfs lifecycle.
349    void SetIno(ino_t ino);
350    static size_t GetHash(ino_t key) { return fnv1a_tiny(key, kMinfsHashBits); }
351
352    // fs::Vnode interface (invoked publicly).
353#ifdef __Fuchsia__
354    zx_status_t Serve(fs::Vfs* vfs, zx::channel channel, uint32_t flags) final;
355#endif
356    zx_status_t Open(uint32_t flags, fbl::RefPtr<Vnode>* out_redirect) final;
357    zx_status_t Close() final;
358
359    // fbl::Recyclable interface.
360    void fbl_recycle() final;
361
362#ifdef __Fuchsia__
363    // Minfs FIDL interface.
364    zx_status_t GetMetrics(fidl_txn_t* txn);
365    zx_status_t ToggleMetrics(bool enabled, fidl_txn_t* txn);
366#endif
367
368    // TODO(rvargas): Make private.
369    Minfs* const fs_;
370
371private:
372    // Fsck can introspect Minfs
373    friend class MinfsChecker;
374    friend zx_status_t Minfs::InoFree(VnodeMinfs* vn, WritebackWork* wb);
375
376    VnodeMinfs(Minfs* fs);
377
378    // fs::Vnode interface.
379    zx_status_t ValidateFlags(uint32_t flags) final;
380    zx_status_t Lookup(fbl::RefPtr<fs::Vnode>* out, fbl::StringPiece name) final;
381    zx_status_t Read(void* data, size_t len, size_t off, size_t* out_actual) final;
382    zx_status_t Write(const void* data, size_t len, size_t offset,
383                      size_t* out_actual) final;
384    zx_status_t Append(const void* data, size_t len, size_t* out_end,
385                       size_t* out_actual) final;
386    zx_status_t Getattr(vnattr_t* a) final;
387    zx_status_t Setattr(const vnattr_t* a) final;
388    zx_status_t Readdir(fs::vdircookie_t* cookie, void* dirents, size_t len,
389                        size_t* out_actual) final;
390    zx_status_t Create(fbl::RefPtr<fs::Vnode>* out, fbl::StringPiece name,
391                       uint32_t mode) final;
392    zx_status_t Unlink(fbl::StringPiece name, bool must_be_dir) final;
393    zx_status_t Rename(fbl::RefPtr<fs::Vnode> newdir,
394                       fbl::StringPiece oldname, fbl::StringPiece newname,
395                       bool src_must_be_dir, bool dst_must_be_dir) final;
396    zx_status_t Link(fbl::StringPiece name, fbl::RefPtr<fs::Vnode> target) final;
397    zx_status_t Truncate(size_t len) final;
398#ifdef __Fuchsia__
399    zx_status_t QueryFilesystem(fuchsia_io_FilesystemInfo* out) final;
400    zx_status_t GetDevicePath(size_t buffer_len, char* out_name, size_t* out_len) final;
401#endif
402
403    // Internal functions
404    zx_status_t ReadInternal(void* data, size_t len, size_t off, size_t* actual);
405    zx_status_t ReadExactInternal(void* data, size_t len, size_t off);
406    zx_status_t WriteInternal(Transaction* state, const void* data, size_t len,
407                              size_t off, size_t* actual);
408    zx_status_t WriteExactInternal(Transaction* state, const void* data, size_t len,
409                                   size_t off);
410    zx_status_t TruncateInternal(Transaction* state, size_t len);
411    // Lookup which can traverse '..'
412    zx_status_t LookupInternal(fbl::RefPtr<fs::Vnode>* out, fbl::StringPiece name);
413
414    // Verify that the 'newdir' inode is not a subdirectory of this Vnode.
415    // Traces the path from newdir back to the root inode.
416    zx_status_t CheckNotSubdirectory(fbl::RefPtr<VnodeMinfs> newdir);
417
418    using DirentCallback = zx_status_t (*)(fbl::RefPtr<VnodeMinfs>,
419                                           minfs_dirent_t*, DirArgs*);
420
421    // Enumerates directories.
422    zx_status_t ForEachDirent(DirArgs* args, const DirentCallback func);
423
424    // Directory callback functions.
425    //
426    // The following functions are passable to |ForEachDirent|, which reads the parent directory,
427    // one dirent at a time, and passes each entry to the callback function, along with the DirArgs
428    // information passed to the initial call of |ForEachDirent|.
429    static zx_status_t DirentCallbackFind(fbl::RefPtr<VnodeMinfs>, minfs_dirent_t*, DirArgs*);
430    static zx_status_t DirentCallbackUnlink(fbl::RefPtr<VnodeMinfs>, minfs_dirent_t*, DirArgs*);
431    static zx_status_t DirentCallbackForceUnlink(fbl::RefPtr<VnodeMinfs>, minfs_dirent_t*,
432                                                 DirArgs*);
433    static zx_status_t DirentCallbackAttemptRename(fbl::RefPtr<VnodeMinfs>, minfs_dirent_t*,
434                                                   DirArgs*);
435    static zx_status_t DirentCallbackUpdateInode(fbl::RefPtr<VnodeMinfs>, minfs_dirent_t*,
436                                                 DirArgs*);
437    static zx_status_t DirentCallbackFindSpace(fbl::RefPtr<VnodeMinfs>, minfs_dirent_t*, DirArgs*);
438
439    // Appends a new directory at the specified offset within |args|. This requires a prior call to
440    // DirentCallbackFindSpace to find an offset where there is space for the direntry. It takes
441    // the same |args| that were passed into DirentCallbackFindSpace.
442    zx_status_t AppendDirent(DirArgs* args);
443
444    zx_status_t UnlinkChild(Transaction* state, fbl::RefPtr<VnodeMinfs> child,
445                            minfs_dirent_t* de, DirectoryOffset* offs);
446    // Remove the link to a vnode (referring to inodes exclusively).
447    // Has no impact on direntries (or parent inode).
448    void RemoveInodeLink(WritebackWork* wb);
449
450    // Although file sizes don't need to be block-aligned, the underlying VMO is
451    // always kept at a size which is a multiple of |kMinfsBlockSize|.
452    //
453    // When a Vnode is truncated to a size larger than |inode_.size|, it is
454    // assumed that any space between |inode_.size| and the nearest block is
455    // filled with zeroes in the internal VMO. This function validates that
456    // assumption.
457    inline void ValidateVmoTail() const {
458#if defined(MINFS_PARANOID_MODE) && defined(__Fuchsia__)
459        if (!vmo_.is_valid()) {
460            return;
461        }
462
463        // Verify that everything not allocated to "inode_.size" in the
464        // last block is filled with zeroes.
465        char buf[kMinfsBlockSize];
466        const size_t vmo_size = fbl::round_up(inode_.size, kMinfsBlockSize);
467        ZX_ASSERT(vmo_.read(buf, inode_.size, vmo_size - inode_.size) == ZX_OK);
468        for (size_t i = 0; i < vmo_size - inode_.size; i++) {
469            ZX_ASSERT_MSG(buf[i] == 0, "vmo[%" PRIu64 "] != 0 (inode size = %u)\n",
470                          inode_.size + i, inode_.size);
471        }
472#endif  // MINFS_PARANOID_MODE && __Fuchsia__
473    }
474
475    typedef enum {
476        READ,
477        WRITE,
478        DELETE,
479    } blk_op_t;
480
481    typedef struct bop_params {
482        bop_params(blk_t start, blk_t count, blk_t* bnos)
483            : start(start), count(count), bnos(bnos) {
484                // Initialize output array to 0 in case the indirect block(s) containing these bnos
485                // do not exist
486                if (bnos) {
487                    memset(bnos, 0, sizeof(blk_t) * count);
488                }
489            }
490
491        blk_t start;
492        blk_t count;
493        blk_t* bnos;
494    } bop_params_t;
495
496    class DirectArgs {
497    public:
498        DirectArgs(blk_op_t op, blk_t* array, blk_t count, blk_t* bnos)
499            : op_(op), array_(array), count_(count), bnos_(bnos), dirty_(false) {}
500
501        blk_op_t GetOp() const { return op_; }
502        blk_t GetBno(blk_t index) const { return array_[index]; }
503        void SetBno(blk_t index, blk_t value) {
504            ZX_DEBUG_ASSERT(index < GetCount());
505
506            if (bnos_ != nullptr) {
507                bnos_[index] = value ? value : array_[index];
508            }
509
510            if (array_[index] != value) {
511                array_[index] = value;
512                dirty_ = true;
513            }
514        }
515
516        blk_t GetCount() const { return count_; }
517
518        bool IsDirty() const { return dirty_; }
519    protected:
520        const blk_op_t op_; // determines what operation to perform on blocks
521        blk_t* const array_; // array containing blocks to be operated on
522        const blk_t count_; // number of direct blocks to operate on
523        blk_t* const bnos_; // array of |count| bnos returned to the user
524        bool dirty_; // true if blocks have successfully been op'd
525    };
526
527    class IndirectArgs : public DirectArgs {
528    public:
529        IndirectArgs(blk_op_t op, blk_t* array, blk_t count, blk_t* bnos, blk_t bindex,
530                     blk_t ib_vmo_offset)
531            : DirectArgs(op, array, count, bnos), bindex_(bindex), ib_vmo_offset_(ib_vmo_offset) {}
532
533        void SetDirty() { dirty_ = true; }
534
535        void SetBno(blk_t index, blk_t value) {
536            ZX_DEBUG_ASSERT(index < GetCount());
537            array_[index] = value;
538            SetDirty();
539        }
540
541        // Number of indirect blocks we need to iterate through to touch all |count| direct blocks.
542        blk_t GetCount() const {
543            return (bindex_ + count_ + kMinfsDirectPerIndirect - 1) / kMinfsDirectPerIndirect;
544        }
545
546        blk_t GetOffset() const { return ib_vmo_offset_; }
547
548        // Generate parameters for direct blocks in indirect block |ibindex|, which are contained
549        // in |barray|
550        DirectArgs GetDirect(blk_t* barray, unsigned ibindex) const;
551
552    protected:
553        const blk_t bindex_; // relative index of the first direct block within the first indirect
554                            // block
555        const blk_t ib_vmo_offset_; // index of the first indirect block
556    };
557
558    class DindirectArgs : public IndirectArgs {
559    public:
560        DindirectArgs(blk_op_t op, blk_t* array, blk_t count, blk_t* bnos, blk_t bindex,
561                      blk_t ib_vmo_offset, blk_t ibindex, blk_t dib_vmo_offset)
562            : IndirectArgs(op, array, count, bnos, bindex, ib_vmo_offset),
563              ibindex_(ibindex), dib_vmo_offset_(dib_vmo_offset) {}
564
565        // Number of doubly indirect blocks we need to iterate through to touch all |count| direct
566        // blocks.
567        blk_t GetCount() const {
568            return (ibindex_ + count_ + kMinfsDirectPerDindirect - 1) / kMinfsDirectPerDindirect;
569        }
570
571        blk_t GetOffset() const { return dib_vmo_offset_; }
572
573        // Generate parameters for indirect blocks in doubly indirect block |dibindex|, which are
574        // contained in |iarray|
575        IndirectArgs GetIndirect(blk_t* iarray, unsigned dibindex) const;
576
577    protected:
578        const blk_t ibindex_; // relative index of the first indirect block within the first
579                             // doubly indirect block
580        const blk_t dib_vmo_offset_; // index of the first doubly indirect block
581    };
582
583    // Allocate an indirect or doubly indirect block at |offset| within the indirect vmo and clear
584    // the in-memory block array
585    // Assumes that vmo_indirect_ has already been initialized
586    void AllocateIndirect(Transaction* state, blk_t index, IndirectArgs* args);
587
588    // Perform operation |op| on blocks as specified by |params|
589    // The BlockOp methods should not be called directly
590    // All BlockOp methods assume that vmo_indirect_ has been grown to the required size
591    zx_status_t BlockOp(Transaction* state, blk_op_t op, bop_params_t* params);
592    zx_status_t BlockOpDirect(Transaction* state, DirectArgs* params);
593    zx_status_t BlockOpIndirect(Transaction* state, IndirectArgs* params);
594    zx_status_t BlockOpDindirect(Transaction* state, DindirectArgs* params);
595
596    // Get the disk block 'bno' corresponding to the 'n' block
597    // If 'txn' is non-null, new blocks are allocated for all un-allocated bnos.
598    // This can be extended to retrieve multiple contiguous blocks in one call
599    zx_status_t BlockGet(Transaction* state, blk_t n, blk_t* bno);
600    // Deletes all blocks (relative to a file) from "start" (inclusive) to the end
601    // of the file. Does not update mtime/atime.
602    // This can be extended to return indices of deleted bnos, or to delete a specific number of
603    // bnos
604    zx_status_t BlocksShrink(Transaction* state, blk_t start);
605
606    // Update the vnode's inode and write it to disk.
607    void InodeSync(WritebackWork* wb, uint32_t flags);
608
609    // Deletes this Vnode from disk, freeing the inode and blocks.
610    //
611    // Must only be called on Vnodes which
612    // - Have no open fds
613    // - Are fully unlinked (link count == 0)
614    void Purge(WritebackWork* wb);
615
616#ifdef __Fuchsia__
617    zx_status_t GetHandles(uint32_t flags, zx_handle_t* hnd, uint32_t* type,
618                           zxrio_node_info_t* extra) final;
619    void Sync(SyncCallback closure) final;
620    zx_status_t AttachRemote(fs::MountChannel h) final;
621    zx_status_t InitVmo();
622    zx_status_t InitIndirectVmo();
623
624    // Loads indirect blocks up to and including the doubly indirect block at |index|.
625    zx_status_t LoadIndirectWithinDoublyIndirect(uint32_t index);
626
627    // Initializes the indirect VMO, grows it to |size| bytes, and reads |count| indirect
628    // blocks from |iarray| into the indirect VMO, starting at block offset |offset|.
629    zx_status_t LoadIndirectBlocks(blk_t* iarray, uint32_t count, uint32_t offset,
630                                   uint64_t size);
631
632    // Reads the block at |offset| in memory.
633    // Assumes that vmo_indirect_ has already been initialized
634    void ReadIndirectVmoBlock(uint32_t offset, uint32_t** entry);
635
636    // Clears the block at |offset| in memory.
637    // Assumes that vmo_indirect_ has already been initialized
638    void ClearIndirectVmoBlock(uint32_t offset);
639
640    // Use the watcher container to implement a directory watcher
641    void Notify(fbl::StringPiece name, unsigned event) final;
642    zx_status_t WatchDir(fs::Vfs* vfs, uint32_t mask, uint32_t options, zx::channel watcher) final;
643
644    // The vnode is acting as a mount point for a remote filesystem or device.
645    bool IsRemote() const final;
646    zx::channel DetachRemote() final;
647    zx_handle_t GetRemote() const final;
648    void SetRemote(zx::channel remote) final;
649#else  // !__Fuchsia__
650    // Reads the block at |bno| on disk.
651    void ReadIndirectBlock(blk_t bno, uint32_t* entry);
652
653    // Clears the block at |bno| on disk.
654    void ClearIndirectBlock(blk_t bno);
655#endif
656
657#ifdef __Fuchsia__
658    // TODO(smklein): When we have can register MinFS as a pager service, and
659    // it can properly handle pages faults on a vnode's contents, then we can
660    // avoid reading the entire file up-front. Until then, read the contents of
661    // a VMO into memory when it is read/written.
662    zx::vmo vmo_{};
663    uint64_t vmo_size_ = 0;
664
665    // vmo_indirect_ contains all indirect and doubly indirect blocks in the following order:
666    // First kMinfsIndirect blocks                                - initial set of indirect blocks
667    // Next kMinfsDoublyIndirect blocks                           - doubly indirect blocks
668    // Next kMinfsDoublyIndirect * kMinfsDirectPerIndirect blocks - indirect blocks pointed to
669    //                                                              by doubly indirect blocks
670    fbl::unique_ptr<fzl::MappedVmo> vmo_indirect_{};
671
672    vmoid_t vmoid_{};
673    vmoid_t vmoid_indirect_{};
674
675    fs::RemoteContainer remoter_{};
676    fs::WatcherContainer watcher_{};
677#endif
678
679    ino_t ino_{};
680    minfs_inode_t inode_{};
681
682    // This field tracks the current number of file descriptors with
683    // an open reference to this Vnode. Notably, this is distinct from the
684    // VnodeMinfs's own refcount, since there may still be filesystem
685    // work to do after the last file descriptor has been closed.
686    uint32_t fd_count_{};
687};
688
689// Return the block offset in vmo_indirect_ of indirect blocks pointed to by the doubly indirect
690// block at dindex
691constexpr uint32_t GetVmoOffsetForIndirect(uint32_t dibindex) {
692    return kMinfsIndirect + kMinfsDoublyIndirect + (dibindex * kMinfsDirectPerIndirect);
693}
694
695// Return the required vmo size (in bytes) to store indirect blocks pointed to by doubly indirect
696// block dibindex
697constexpr size_t GetVmoSizeForIndirect(uint32_t dibindex) {
698    return GetVmoOffsetForIndirect(dibindex + 1) * kMinfsBlockSize;
699}
700
701// Return the block offset of doubly indirect blocks in vmo_indirect_
702constexpr uint32_t GetVmoOffsetForDoublyIndirect(uint32_t dibindex) {
703    ZX_DEBUG_ASSERT(dibindex < kMinfsDoublyIndirect);
704    return kMinfsIndirect + dibindex;
705}
706
707// Return the required vmo size (in bytes) to store doubly indirect blocks in vmo_indirect_
708constexpr size_t GetVmoSizeForDoublyIndirect() {
709    return (kMinfsIndirect + kMinfsDoublyIndirect) * kMinfsBlockSize;
710}
711
712// Tries to calculate the required number of blocks into |num_req_blocks|
713// for a write at the given |offset| and |length|.
714zx_status_t GetRequiredBlockCount(size_t offset, size_t length, uint32_t* num_req_blocks);
715
716// write the inode data of this vnode to disk (default does not update time values)
717void minfs_sync_vnode(fbl::RefPtr<VnodeMinfs> vn, uint32_t flags);
718void minfs_dump_info(const minfs_info_t* info);
719void minfs_dump_inode(const minfs_inode_t* inode, ino_t ino);
720void minfs_dir_init(void* bdata, ino_t ino_self, ino_t ino_parent);
721
722// Given an input bcache, initialize the filesystem and return a reference to the
723// root node.
724zx_status_t minfs_mount(fbl::unique_ptr<minfs::Bcache> bc, fbl::RefPtr<VnodeMinfs>* root_out);
725
726} // namespace minfs
727