1// Copyright 2016 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <fcntl.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>
9#include <time.h>
10#include <unistd.h>
11#include <sys/stat.h>
12
13#include <fbl/algorithm.h>
14#include <fbl/auto_call.h>
15#include <fbl/string_piece.h>
16#include <fs/block-txn.h>
17#include <zircon/device/vfs.h>
18#include <zircon/time.h>
19
20#ifdef __Fuchsia__
21#include <zircon/syscalls.h>
22#include <lib/fdio/vfs.h>
23#include <fbl/auto_lock.h>
24#endif
25
26#include "minfs-private.h"
27
28// #define DEBUG_PRINTF
29#ifdef DEBUG_PRINTF
30#define xprintf(args...) fprintf(stderr, args)
31#else
32#define xprintf(args...)
33#endif
34
35namespace {
36
37zx_time_t minfs_gettime_utc() {
38    // linux/zircon compatible
39    struct timespec ts;
40    clock_gettime(CLOCK_REALTIME, &ts);
41    zx_time_t time = zx_time_add_duration(ZX_SEC(ts.tv_sec), ts.tv_nsec);
42    return time;
43}
44
45} // namespace anonymous
46
47namespace minfs {
48
49void VnodeMinfs::SetIno(ino_t ino) {
50    ZX_DEBUG_ASSERT(ino_ == 0);
51    ino_ = ino;
52}
53
54void VnodeMinfs::InodeSync(WritebackWork* wb, uint32_t flags) {
55    // by default, c/mtimes are not updated to current time
56    if (flags != kMxFsSyncDefault) {
57        zx_time_t cur_time = minfs_gettime_utc();
58        // update times before syncing
59        if ((flags & kMxFsSyncMtime) != 0) {
60            inode_.modify_time = cur_time;
61        }
62        if ((flags & kMxFsSyncCtime) != 0) {
63            inode_.create_time = cur_time;
64        }
65    }
66
67    fs_->InodeUpdate(wb, ino_, &inode_);
68}
69
70// Delete all blocks (relative to a file) from "start" (inclusive) to the end of
71// the file. Does not update mtime/atime.
72zx_status_t VnodeMinfs::BlocksShrink(Transaction* state, blk_t start) {
73    ZX_DEBUG_ASSERT(state != nullptr);
74    bop_params_t boparams(start, static_cast<blk_t>(kMinfsMaxFileBlock - start), nullptr);
75    zx_status_t status;
76    if ((status = BlockOp(state, DELETE, &boparams)) != ZX_OK) {
77        return status;
78    }
79
80#ifdef __Fuchsia__
81    // Arbitrary minimum size for indirect vmo
82    size_t size = (kMinfsIndirect + kMinfsDoublyIndirect) * kMinfsBlockSize;
83    // Number of blocks before dindirect blocks start
84    blk_t pre_dindirect = kMinfsDirect + kMinfsDirectPerIndirect * kMinfsIndirect;
85    if (start > pre_dindirect) {
86        blk_t distart = start - pre_dindirect; //first bno relative to dindirect blocks
87        blk_t last_dindirect = distart / (kMinfsDirectPerDindirect); // index of last dindirect
88
89        // Calculate new size for indirect vmo
90        if (distart % kMinfsDirectPerDindirect) {
91            size = GetVmoSizeForIndirect(last_dindirect);
92        } else if (last_dindirect) {
93            size = GetVmoSizeForIndirect(last_dindirect - 1);
94        }
95    }
96
97    // Shrink the indirect vmo if necessary
98    if (vmo_indirect_ != nullptr && vmo_indirect_->GetSize() > size) {
99        if ((status = vmo_indirect_->Shrink(size)) != ZX_OK) {
100            return status;
101        }
102    }
103#endif
104    return ZX_OK;
105}
106
107#ifdef __Fuchsia__
108zx_status_t VnodeMinfs::LoadIndirectBlocks(blk_t* iarray, uint32_t count, uint32_t offset,
109                                           uint64_t size) {
110    zx_status_t status;
111    if ((status = InitIndirectVmo()) != ZX_OK) {
112        return status;
113    }
114
115    if (vmo_indirect_->GetSize() < size) {
116        zx_status_t status;
117        if ((status = vmo_indirect_->Grow(size)) != ZX_OK) {
118            return status;
119        }
120    }
121
122    fs::ReadTxn txn(fs_->bc_.get());
123
124    for (uint32_t i = 0; i < count; i++) {
125        blk_t ibno;
126        if ((ibno = iarray[i]) != 0) {
127            fs_->ValidateBno(ibno);
128            txn.Enqueue(vmoid_indirect_, offset + i, ibno + fs_->Info().dat_block, 1);
129        }
130    }
131
132    return txn.Transact();
133}
134
135zx_status_t VnodeMinfs::LoadIndirectWithinDoublyIndirect(uint32_t dindex) {
136    uint32_t* dientry;
137
138    size_t size = GetVmoSizeForIndirect(dindex);
139    if (vmo_indirect_->GetSize() >= size) {
140        // We've already loaded this indirect (within dind) block.
141        return ZX_OK;
142    }
143
144    ReadIndirectVmoBlock(GetVmoOffsetForDoublyIndirect(dindex), &dientry);
145    return LoadIndirectBlocks(dientry, kMinfsDirectPerIndirect,
146                              GetVmoOffsetForIndirect(dindex), size);
147}
148
149zx_status_t VnodeMinfs::InitIndirectVmo() {
150    if (vmo_indirect_ != nullptr) {
151        return ZX_OK;
152    }
153
154    zx_status_t status;
155    if ((status = fzl::MappedVmo::Create(kMinfsBlockSize * (kMinfsIndirect + kMinfsDoublyIndirect),
156                                        "minfs-indirect", &vmo_indirect_)) != ZX_OK) {
157        return status;
158    }
159    if ((status = fs_->bc_->AttachVmo(vmo_indirect_->GetVmo(), &vmoid_indirect_)) != ZX_OK) {
160        vmo_indirect_ = nullptr;
161        return status;
162    }
163
164    // Load initial set of indirect blocks
165    if ((status = LoadIndirectBlocks(inode_.inum, kMinfsIndirect, 0, 0)) != ZX_OK) {
166        vmo_indirect_ = nullptr;
167        return status;
168    }
169
170    // Load doubly indirect blocks
171    if ((status = LoadIndirectBlocks(inode_.dinum, kMinfsDoublyIndirect,
172                                     GetVmoOffsetForDoublyIndirect(0),
173                                     GetVmoSizeForDoublyIndirect()) != ZX_OK)) {
174        vmo_indirect_ = nullptr;
175        return status;
176    }
177
178    return ZX_OK;
179}
180
181// Since we cannot yet register the filesystem as a paging service (and cleanly
182// fault on pages when they are actually needed), we currently read an entire
183// file to a VMO when a file's data block are accessed.
184//
185// TODO(smklein): Even this hack can be optimized; a bitmap could be used to
186// track all 'empty/read/dirty' blocks for each vnode, rather than reading
187// the entire file.
188zx_status_t VnodeMinfs::InitVmo() {
189    if (vmo_.is_valid()) {
190        return ZX_OK;
191    }
192
193    zx_status_t status;
194    const size_t vmo_size = fbl::round_up(inode_.size, kMinfsBlockSize);
195    if ((status = zx::vmo::create(vmo_size, 0, &vmo_)) != ZX_OK) {
196        FS_TRACE_ERROR("Failed to initialize vmo; error: %d\n", status);
197        return status;
198    }
199    vmo_size_ = vmo_size;
200
201    zx_object_set_property(vmo_.get(), ZX_PROP_NAME, "minfs-inode", 11);
202
203    if ((status = fs_->bc_->AttachVmo(vmo_.get(), &vmoid_)) != ZX_OK) {
204        vmo_.reset();
205        return status;
206    }
207    fs::ReadTxn txn(fs_->bc_.get());
208    uint32_t dnum_count = 0;
209    uint32_t inum_count = 0;
210    uint32_t dinum_count = 0;
211    fs::Ticker ticker(fs_->StartTicker());
212    auto get_metrics = fbl::MakeAutoCall([&]() {
213        fs_->UpdateInitMetrics(dnum_count, inum_count, dinum_count, vmo_size,
214                               ticker.End());
215    });
216
217    // Initialize all direct blocks
218    blk_t bno;
219    for (uint32_t d = 0; d < kMinfsDirect; d++) {
220        if ((bno = inode_.dnum[d]) != 0) {
221            fs_->ValidateBno(bno);
222            dnum_count++;
223            txn.Enqueue(vmoid_, d, bno + fs_->Info().dat_block, 1);
224        }
225    }
226
227    // Initialize all indirect blocks
228    for (uint32_t i = 0; i < kMinfsIndirect; i++) {
229        blk_t ibno;
230        if ((ibno = inode_.inum[i]) != 0) {
231            fs_->ValidateBno(ibno);
232            inum_count++;
233
234            // Only initialize the indirect vmo if it is being used.
235            if ((status = InitIndirectVmo()) != ZX_OK) {
236                vmo_.reset();
237                return status;
238            }
239
240            uint32_t* ientry;
241            ReadIndirectVmoBlock(i, &ientry);
242
243            for (uint32_t j = 0; j < kMinfsDirectPerIndirect; j++) {
244                if ((bno = ientry[j]) != 0) {
245                    fs_->ValidateBno(bno);
246                    uint32_t n = kMinfsDirect + i * kMinfsDirectPerIndirect + j;
247                    txn.Enqueue(vmoid_, n, bno + fs_->Info().dat_block, 1);
248                }
249            }
250        }
251    }
252
253    // Initialize all doubly indirect blocks
254    for (uint32_t i = 0; i < kMinfsDoublyIndirect; i++) {
255        blk_t dibno;
256
257        if ((dibno = inode_.dinum[i]) != 0) {
258            fs_->ValidateBno(dibno);
259            dinum_count++;
260
261            // Only initialize the doubly indirect vmo if it is being used.
262            if ((status = InitIndirectVmo()) != ZX_OK) {
263                vmo_.reset();
264                return status;
265            }
266
267            uint32_t* dientry;
268            ReadIndirectVmoBlock(GetVmoOffsetForDoublyIndirect(i), &dientry);
269
270            for (uint32_t j = 0; j < kMinfsDirectPerIndirect; j++) {
271                blk_t ibno;
272                if ((ibno = dientry[j]) != 0) {
273                    fs_->ValidateBno(ibno);
274
275                    // Only initialize the indirect vmo if it is being used.
276                    if ((status = LoadIndirectWithinDoublyIndirect(i)) != ZX_OK) {
277                        vmo_.reset();
278                        return status;
279                    }
280
281                    uint32_t* ientry;
282                    ReadIndirectVmoBlock(GetVmoOffsetForIndirect(i) + j, &ientry);
283
284                    for (uint32_t k = 0; k < kMinfsDirectPerIndirect; k++) {
285                        if ((bno = ientry[k]) != 0) {
286                            fs_->ValidateBno(bno);
287                            uint32_t n = kMinfsDirect + kMinfsIndirect * kMinfsDirectPerIndirect
288                                         + j * kMinfsDirectPerIndirect + k;
289                            txn.Enqueue(vmoid_, n, bno + fs_->Info().dat_block, 1);
290                        }
291                    }
292                }
293            }
294        }
295    }
296
297    status = txn.Transact();
298    ValidateVmoTail();
299    return status;
300}
301#endif
302
303void VnodeMinfs::AllocateIndirect(Transaction* state, blk_t index, IndirectArgs* args) {
304    ZX_DEBUG_ASSERT(state != nullptr);
305
306    // *bno must not be already allocated
307    ZX_DEBUG_ASSERT(args->GetBno(index) == 0);
308
309    // allocate new indirect block
310    blk_t bno;
311    fs_->BlockNew(state, &bno);
312
313#ifdef __Fuchsia__
314    ClearIndirectVmoBlock(args->GetOffset() + index);
315#else
316    ClearIndirectBlock(bno);
317#endif
318
319    args->SetBno(index, bno);
320    inode_.block_count++;
321}
322
323zx_status_t VnodeMinfs::BlockOpDirect(Transaction* state, DirectArgs* params) {
324    for (unsigned i = 0; i < params->GetCount(); i++) {
325        blk_t bno = params->GetBno(i);
326        switch (params->GetOp()) {
327            case DELETE: {
328                // If we found a valid block, delete it.
329                if (bno) {
330                    fs_->ValidateBno(bno);
331                    fs_->BlockFree(state->GetWork(), bno);
332                    params->SetBno(i, 0);
333                    inode_.block_count--;
334                }
335                break;
336            }
337            case WRITE: {
338                ZX_DEBUG_ASSERT(state != nullptr);
339                if (bno == 0) {
340                    fs_->BlockNew(state, &bno);
341                    inode_.block_count++;
342                }
343
344                fs_->ValidateBno(bno);
345            }
346            __FALLTHROUGH;
347            case READ: {
348                params->SetBno(i, bno);
349                break;
350            }
351            default: {
352                return ZX_ERR_NOT_SUPPORTED;
353            }
354        }
355    }
356
357    return ZX_OK;
358}
359
360zx_status_t VnodeMinfs::BlockOpIndirect(Transaction* state, IndirectArgs* params) {
361    // we should have initialized vmo before calling this method
362    zx_status_t status;
363
364#ifdef __Fuchsia__
365    if (params->GetOp() == READ || params->GetOp() == WRITE) {
366        validate_vmo_size(vmo_indirect_->GetVmo(), params->GetOffset() + params->GetCount());
367    }
368#endif
369
370    for (unsigned i = 0; i < params->GetCount(); i++) {
371        bool dirty = false;
372        if (params->GetBno(i) == 0) {
373            switch (params->GetOp()) {
374            case DELETE:
375                continue;
376            case READ:
377                return ZX_OK;
378            case WRITE:
379                AllocateIndirect(state, i, params);
380                break;
381            default:
382                return ZX_ERR_NOT_SUPPORTED;
383            }
384
385        }
386
387#ifdef __Fuchsia__
388        blk_t* entry;
389        ReadIndirectVmoBlock(params->GetOffset() + i, &entry);
390#else
391        blk_t entry[kMinfsBlockSize];
392        ReadIndirectBlock(params->GetBno(i), entry);
393#endif
394
395        DirectArgs direct_params = params->GetDirect(entry, i);
396        if ((status = BlockOpDirect(state, &direct_params)) != ZX_OK) {
397            return status;
398        }
399
400        // only update the indirect block if an entry was deleted
401        if (dirty || direct_params.IsDirty()) {
402#ifdef __Fuchsia__
403            state->GetWork()->Enqueue(vmo_indirect_->GetVmo(), params->GetOffset() + i,
404                        params->GetBno(i) + fs_->Info().dat_block, 1);
405#else
406            fs_->bc_->Writeblk(params->GetBno(i) + fs_->Info().dat_block, entry);
407#endif
408            params->SetDirty();
409        }
410
411        // We can delete the current indirect block if all direct blocks within it are deleted
412        if (params->GetOp() == DELETE && direct_params.GetCount() == kMinfsDirectPerIndirect) {
413            // release the direct block itself
414            fs_->BlockFree(state->GetWork(), params->GetBno(i));
415            params->SetBno(i, 0);
416            inode_.block_count--;
417        }
418    }
419
420    return ZX_OK;
421
422}
423
424zx_status_t VnodeMinfs::BlockOpDindirect(Transaction* state, DindirectArgs* params) {
425    zx_status_t status;
426
427#ifdef __Fuchsia__
428    if (params->GetOp() == READ || params->GetOp() == WRITE) {
429        validate_vmo_size(vmo_indirect_->GetVmo(), params->GetOffset() + params->GetCount());
430    }
431#endif
432
433    // operate on doubly indirect blocks
434    for (unsigned i = 0; i < params->GetCount(); i++) {
435        bool dirty = false;
436        if (params->GetBno(i) == 0) {
437            switch (params->GetOp()) {
438            case DELETE:
439                continue;
440            case READ:
441                return ZX_OK;
442            case WRITE:
443                AllocateIndirect(state, i, params);
444                break;
445            default:
446                return ZX_ERR_NOT_SUPPORTED;
447            }
448        }
449
450#ifdef __Fuchsia__
451        uint32_t* dientry;
452        ReadIndirectVmoBlock(GetVmoOffsetForDoublyIndirect(i), &dientry);
453#else
454        uint32_t dientry[kMinfsBlockSize];
455        ReadIndirectBlock(params->GetBno(i), dientry);
456#endif
457
458        // operate on blocks pointed at by the entries in the indirect block
459        IndirectArgs indirect_params = params->GetIndirect(dientry, i);
460        if ((status = BlockOpIndirect(state, &indirect_params)) != ZX_OK) {
461            return status;
462        }
463
464        // only update the indirect block if an entry was deleted
465        if (dirty || indirect_params.IsDirty()) {
466#ifdef __Fuchsia__
467            state->GetWork()->Enqueue(vmo_indirect_->GetVmo(), params->GetOffset() + i,
468                                      params->GetBno(i) + fs_->Info().dat_block, 1);
469#else
470            fs_->bc_->Writeblk(params->GetBno(i) + fs_->Info().dat_block, dientry);
471#endif
472            params->SetDirty();
473        }
474
475        // We can delete the current doubly indirect block if all indirect blocks within it
476        // (and direct blocks within those) are deleted
477        if (params->GetOp() == DELETE && indirect_params.GetCount() == kMinfsDirectPerDindirect) {
478            // release the doubly indirect block itself
479            fs_->BlockFree(state->GetWork(), params->GetBno(i));
480            params->SetBno(i, 0);
481            inode_.block_count--;
482        }
483    }
484
485    return ZX_OK;
486}
487
488#ifdef __Fuchsia__
489void VnodeMinfs::ReadIndirectVmoBlock(uint32_t offset, uint32_t** entry) {
490    ZX_DEBUG_ASSERT(vmo_indirect_ != nullptr);
491    uintptr_t addr = reinterpret_cast<uintptr_t>(vmo_indirect_->GetData());
492    validate_vmo_size(vmo_indirect_->GetVmo(), offset);
493    *entry = reinterpret_cast<uint32_t*>(addr + kMinfsBlockSize * offset);
494}
495
496void VnodeMinfs::ClearIndirectVmoBlock(uint32_t offset) {
497    ZX_DEBUG_ASSERT(vmo_indirect_ != nullptr);
498    uintptr_t addr = reinterpret_cast<uintptr_t>(vmo_indirect_->GetData());
499    validate_vmo_size(vmo_indirect_->GetVmo(), offset);
500    memset(reinterpret_cast<void*>(addr + kMinfsBlockSize * offset), 0, kMinfsBlockSize);
501}
502#else
503void VnodeMinfs::ReadIndirectBlock(blk_t bno, uint32_t* entry) {
504    fs_->bc_->Readblk(bno + fs_->Info().dat_block, entry);
505}
506
507void VnodeMinfs::ClearIndirectBlock(blk_t bno) {
508    uint32_t data[kMinfsBlockSize];
509    memset(data, 0, kMinfsBlockSize);
510    fs_->bc_->Writeblk(bno + fs_->Info().dat_block, data);
511}
512#endif
513
514zx_status_t VnodeMinfs::BlockOp(Transaction* state, blk_op_t op, bop_params_t* boparams) {
515    blk_t start = boparams->start;
516    blk_t found = 0;
517    bool dirty = false;
518    if (found < boparams->count && start < kMinfsDirect) {
519        // array starting with first direct block
520        blk_t* array = &inode_.dnum[start];
521        // number of direct blocks to process
522        blk_t count = fbl::min(boparams->count - found, kMinfsDirect - start);
523        // if bnos exist, adjust past found (should be 0)
524        blk_t* bnos = boparams->bnos == nullptr ? nullptr : &boparams->bnos[found];
525
526        DirectArgs direct_params(op, array, count, bnos);
527        zx_status_t status;
528        if ((status = BlockOpDirect(state, &direct_params)) != ZX_OK) {
529            return status;
530        }
531
532        found += count;
533        dirty |= direct_params.IsDirty();
534    }
535
536    // for indirect blocks, adjust past the direct blocks
537    if (start < kMinfsDirect) {
538        start = 0;
539    } else {
540        start -= kMinfsDirect;
541    }
542
543    if (found < boparams->count && start < kMinfsIndirect * kMinfsDirectPerIndirect) {
544        // index of indirect block, and offset of that block within indirect vmo
545        blk_t ibindex = start / kMinfsDirectPerIndirect;
546        // index of direct block within indirect block
547        blk_t bindex = start % kMinfsDirectPerIndirect;
548
549        // array starting with first indirect block
550        blk_t* array = &inode_.inum[ibindex];
551        // number of direct blocks to process within indirect blocks
552        blk_t count = fbl::min(boparams->count - found,
553                               kMinfsIndirect * kMinfsDirectPerIndirect - start);
554        // if bnos exist, adjust past found
555        blk_t* bnos = boparams->bnos == nullptr ? nullptr : &boparams->bnos[found];
556
557        IndirectArgs indirect_params(op, array, count, bnos, bindex, ibindex);
558        zx_status_t status;
559        if ((status = BlockOpIndirect(state, &indirect_params)) != ZX_OK) {
560            return status;
561        }
562
563        found += count;
564        dirty |= indirect_params.IsDirty();
565    }
566
567    // for doubly indirect blocks, adjust past the indirect blocks
568    if (start < kMinfsIndirect * kMinfsDirectPerIndirect) {
569        start = 0;
570    } else {
571        start -= kMinfsIndirect * kMinfsDirectPerIndirect;
572    }
573
574    if (found < boparams->count &&
575        start < kMinfsDoublyIndirect * kMinfsDirectPerIndirect * kMinfsDirectPerIndirect) {
576        // index of doubly indirect block
577        uint32_t dibindex = start / (kMinfsDirectPerIndirect * kMinfsDirectPerIndirect);
578        ZX_DEBUG_ASSERT(dibindex < kMinfsDoublyIndirect);
579        start -= (dibindex * kMinfsDirectPerIndirect * kMinfsDirectPerIndirect);
580
581        // array starting with first doubly indirect block
582        blk_t* array = &inode_.dinum[dibindex];
583        // number of direct blocks to process within doubly indirect blocks
584        blk_t count = fbl::min(boparams->count - found,
585                kMinfsDoublyIndirect * kMinfsDirectPerIndirect * kMinfsDirectPerIndirect - start);
586        // if bnos exist, adjust past found
587        blk_t* bnos = boparams->bnos == nullptr ? nullptr : &boparams->bnos[found];
588        // index of direct block within indirect block
589        blk_t bindex = start % kMinfsDirectPerIndirect;
590        // offset of indirect block within indirect vmo
591        blk_t ib_vmo_offset = GetVmoOffsetForIndirect(dibindex);
592        // index of indirect block within doubly indirect block
593        blk_t ibindex = start / kMinfsDirectPerIndirect;
594        // offset of doubly indirect block within indirect vmo
595        blk_t dib_vmo_offset = GetVmoOffsetForDoublyIndirect(dibindex);
596
597        DindirectArgs dindirect_params(op, array, count, bnos, bindex, ib_vmo_offset, ibindex,
598                                       dib_vmo_offset);
599        zx_status_t status;
600        if ((status = BlockOpDindirect(state, &dindirect_params)) != ZX_OK) {
601            return status;
602        }
603
604        found += count;
605        dirty |= dindirect_params.IsDirty();
606    }
607
608    if (dirty) {
609        ZX_DEBUG_ASSERT(state != nullptr);
610        InodeSync(state->GetWork(), kMxFsSyncDefault);
611    }
612
613    // Return out of range if we were not able to process all blocks
614    return found == boparams->count ? ZX_OK : ZX_ERR_OUT_OF_RANGE;
615}
616
617zx_status_t VnodeMinfs::BlockGet(Transaction* state, blk_t n, blk_t* bno) {
618#ifdef __Fuchsia__
619    if (n >= kMinfsDirect) {
620        zx_status_t status;
621        // If the vmo_indirect_ vmo has not been created, make it now.
622        if ((status = InitIndirectVmo()) != ZX_OK) {
623            return status;
624        }
625
626        // Number of blocks prior to dindirect blocks
627        blk_t pre_dindirect = kMinfsDirect + kMinfsDirectPerIndirect * kMinfsIndirect;
628        if (n >= pre_dindirect) {
629            // Index of last doubly indirect block
630            blk_t dibindex = (n - pre_dindirect) / kMinfsDirectPerDindirect;
631            ZX_DEBUG_ASSERT(dibindex < kMinfsDoublyIndirect);
632            uint64_t vmo_size = GetVmoSizeForIndirect(dibindex);
633            // Grow VMO if we need more space to fit doubly indirect blocks
634            if (vmo_indirect_->GetSize() < vmo_size) {
635                if ((status = vmo_indirect_->Grow(vmo_size)) != ZX_OK) {
636                    return status;
637                }
638            }
639        }
640    }
641#endif
642
643    bop_params_t boparams(n, 1, bno);
644    return BlockOp(state, state ? WRITE : READ, &boparams);
645}
646
647// Immediately stop iterating over the directory.
648#define DIR_CB_DONE 0
649// Access the next direntry in the directory. Offsets updated.
650#define DIR_CB_NEXT 1
651// Identify that the direntry record was modified. Stop iterating.
652#define DIR_CB_SAVE_SYNC 2
653
654zx_status_t VnodeMinfs::ReadExactInternal(void* data, size_t len, size_t off) {
655    size_t actual;
656    zx_status_t status = ReadInternal(data, len, off, &actual);
657    if (status != ZX_OK) {
658        return status;
659    } else if (actual != len) {
660        return ZX_ERR_IO;
661    }
662    return ZX_OK;
663}
664
665zx_status_t VnodeMinfs::WriteExactInternal(Transaction* state, const void* data,
666                                           size_t len, size_t off) {
667    size_t actual;
668    zx_status_t status = WriteInternal(state, data, len, off, &actual);
669    if (status != ZX_OK) {
670        return status;
671    } else if (actual != len) {
672        return ZX_ERR_IO;
673    }
674    InodeSync(state->GetWork(), kMxFsSyncMtime);
675    return ZX_OK;
676}
677
678static zx_status_t validate_dirent(minfs_dirent_t* de, size_t bytes_read, size_t off) {
679    uint32_t reclen = static_cast<uint32_t>(MinfsReclen(de, off));
680    if ((bytes_read < MINFS_DIRENT_SIZE) || (reclen < MINFS_DIRENT_SIZE)) {
681        FS_TRACE_ERROR("vn_dir: Could not read dirent at offset: %zd\n", off);
682        return ZX_ERR_IO;
683    } else if ((off + reclen > kMinfsMaxDirectorySize) || (reclen & 3)) {
684        FS_TRACE_ERROR("vn_dir: bad reclen %u > %u\n", reclen, kMinfsMaxDirectorySize);
685        return ZX_ERR_IO;
686    } else if (de->ino != 0) {
687        if ((de->namelen == 0) ||
688            (de->namelen > (reclen - MINFS_DIRENT_SIZE))) {
689            FS_TRACE_ERROR("vn_dir: bad namelen %u / %u\n", de->namelen, reclen);
690            return ZX_ERR_IO;
691        }
692    }
693    return ZX_OK;
694}
695
696// Updates offset information to move to the next direntry in the directory.
697static zx_status_t do_next_dirent(minfs_dirent_t* de, DirectoryOffset* offs) {
698    offs->off_prev = offs->off;
699    offs->off += MinfsReclen(de, offs->off);
700    return DIR_CB_NEXT;
701}
702
703zx_status_t VnodeMinfs::DirentCallbackFind(fbl::RefPtr<VnodeMinfs> vndir, minfs_dirent_t* de,
704                                           DirArgs* args) {
705    if ((de->ino != 0) && fbl::StringPiece(de->name, de->namelen) == args->name) {
706        args->ino = de->ino;
707        args->type = de->type;
708        return DIR_CB_DONE;
709    } else {
710        return do_next_dirent(de, &args->offs);
711    }
712}
713
714zx_status_t VnodeMinfs::CanUnlink() const {
715    // directories must be empty (dirent_count == 2)
716    if (IsDirectory()) {
717        if (inode_.dirent_count != 2) {
718            // if we have more than "." and "..", not empty, cannot unlink
719            return ZX_ERR_NOT_EMPTY;
720#ifdef __Fuchsia__
721        } else if (IsRemote()) {
722            // we cannot unlink mount points
723            return ZX_ERR_UNAVAILABLE;
724#endif
725        }
726    }
727    return ZX_OK;
728}
729
730zx_status_t VnodeMinfs::UnlinkChild(Transaction* state,
731                                    fbl::RefPtr<VnodeMinfs> childvn,
732                                    minfs_dirent_t* de, DirectoryOffset* offs) {
733    // Coalesce the current dirent with the previous/next dirent, if they
734    // (1) exist and (2) are free.
735    size_t off_prev = offs->off_prev;
736    size_t off = offs->off;
737    size_t off_next = off + MinfsReclen(de, off);
738    minfs_dirent_t de_prev, de_next;
739    zx_status_t status;
740
741    // Read the direntries we're considering merging with.
742    // Verify they are free and small enough to merge.
743    size_t coalesced_size = MinfsReclen(de, off);
744    // Coalesce with "next" first, so the kMinfsReclenLast bit can easily flow
745    // back to "de" and "de_prev".
746    if (!(de->reclen & kMinfsReclenLast)) {
747        size_t len = MINFS_DIRENT_SIZE;
748        if ((status = ReadExactInternal(&de_next, len, off_next)) != ZX_OK) {
749            FS_TRACE_ERROR("unlink: Failed to read next dirent\n");
750            return status;
751        } else if ((status = validate_dirent(&de_next, len, off_next)) != ZX_OK) {
752            FS_TRACE_ERROR("unlink: Read invalid dirent\n");
753            return status;
754        }
755        if (de_next.ino == 0) {
756            coalesced_size += MinfsReclen(&de_next, off_next);
757            // If the next entry *was* last, then 'de' is now last.
758            de->reclen |= (de_next.reclen & kMinfsReclenLast);
759        }
760    }
761    if (off_prev != off) {
762        size_t len = MINFS_DIRENT_SIZE;
763        if ((status = ReadExactInternal(&de_prev, len, off_prev)) != ZX_OK) {
764            FS_TRACE_ERROR("unlink: Failed to read previous dirent\n");
765            return status;
766        } else if ((status = validate_dirent(&de_prev, len, off_prev)) != ZX_OK) {
767            FS_TRACE_ERROR("unlink: Read invalid dirent\n");
768            return status;
769        }
770        if (de_prev.ino == 0) {
771            coalesced_size += MinfsReclen(&de_prev, off_prev);
772            off = off_prev;
773        }
774    }
775
776    if (!(de->reclen & kMinfsReclenLast) && (coalesced_size >= kMinfsReclenMask)) {
777        // Should only be possible if the on-disk record format is corrupted
778        FS_TRACE_ERROR("unlink: Corrupted direntry with impossibly large size\n");
779        return ZX_ERR_IO;
780    }
781    de->ino = 0;
782    de->reclen = static_cast<uint32_t>(coalesced_size & kMinfsReclenMask) |
783        (de->reclen & kMinfsReclenLast);
784    // Erase dirent (replace with 'empty' dirent)
785    if ((status = WriteExactInternal(state, de, MINFS_DIRENT_SIZE, off)) != ZX_OK) {
786        return status;
787    }
788
789    if (de->reclen & kMinfsReclenLast) {
790        // Truncating the directory merely removed unused space; if it fails,
791        // the directory contents are still valid.
792        TruncateInternal(state, off + MINFS_DIRENT_SIZE);
793    }
794
795    inode_.dirent_count--;
796
797    if (MinfsMagicType(childvn->inode_.magic) == kMinfsTypeDir) {
798        // Child directory had '..' which pointed to parent directory
799        inode_.link_count--;
800    }
801    childvn->RemoveInodeLink(state->GetWork());
802    state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
803    state->GetWork()->PinVnode(childvn);
804    return DIR_CB_SAVE_SYNC;
805}
806
807void VnodeMinfs::RemoveInodeLink(WritebackWork* wb) {
808    // This effectively 'unlinks' the target node without deleting the direntry
809    inode_.link_count--;
810    if (MinfsMagicType(inode_.magic) == kMinfsTypeDir) {
811        if (inode_.link_count == 1) {
812            // Directories are initialized with two links, since they point
813            // to themselves via ".". Thus, when they reach "one link", they
814            // are only pointed to by themselves, and should be deleted.
815            inode_.link_count--;
816        }
817    }
818
819    if (fd_count_ == 0 && IsUnlinked()) {
820        Purge(wb);
821    }
822
823    InodeSync(wb, kMxFsSyncMtime);
824}
825
826// caller is expected to prevent unlink of "." or ".."
827zx_status_t VnodeMinfs::DirentCallbackUnlink(fbl::RefPtr<VnodeMinfs> vndir, minfs_dirent_t* de,
828                                             DirArgs* args) {
829    if ((de->ino == 0) || fbl::StringPiece(de->name, de->namelen) != args->name) {
830        return do_next_dirent(de, &args->offs);
831    }
832
833    fbl::RefPtr<VnodeMinfs> vn;
834    zx_status_t status;
835    if ((status = vndir->fs_->VnodeGet(&vn, de->ino)) < 0) {
836        return status;
837    }
838
839    // If a directory was requested, then only try unlinking a directory
840    if ((args->type == kMinfsTypeDir) && !vn->IsDirectory()) {
841        return ZX_ERR_NOT_DIR;
842    }
843    if ((status = vn->CanUnlink()) != ZX_OK) {
844        return status;
845    }
846    return vndir->UnlinkChild(args->state, fbl::move(vn), de, &args->offs);
847}
848
849// same as unlink, but do not validate vnode
850zx_status_t VnodeMinfs::DirentCallbackForceUnlink(fbl::RefPtr<VnodeMinfs> vndir, minfs_dirent_t* de,
851                                                  DirArgs* args) {
852    if ((de->ino == 0) || fbl::StringPiece(de->name, de->namelen) != args->name) {
853        return do_next_dirent(de, &args->offs);
854    }
855
856    fbl::RefPtr<VnodeMinfs> vn;
857    zx_status_t status;
858    if ((status = vndir->fs_->VnodeGet(&vn, de->ino)) < 0) {
859        return status;
860    }
861    return vndir->UnlinkChild(args->state, fbl::move(vn), de, &args->offs);
862}
863
864// Given a (name, inode, type) combination:
865//   - If no corresponding 'name' is found, ZX_ERR_NOT_FOUND is returned
866//   - If the 'name' corresponds to a vnode, check that the target vnode:
867//      - Does not have the same inode as the argument inode
868//      - Is the same type as the argument 'type'
869//      - Is unlinkable
870//   - If the previous checks pass, then:
871//      - Remove the old vnode (decrement link count by one)
872//      - Replace the old vnode's position in the directory with the new inode
873zx_status_t VnodeMinfs::DirentCallbackAttemptRename(fbl::RefPtr<VnodeMinfs> vndir,
874                                                    minfs_dirent_t* de, DirArgs* args) {
875    if ((de->ino == 0) || fbl::StringPiece(de->name, de->namelen) != args->name) {
876        return do_next_dirent(de, &args->offs);
877    }
878
879    fbl::RefPtr<VnodeMinfs> vn;
880    zx_status_t status;
881    if ((status = vndir->fs_->VnodeGet(&vn, de->ino)) < 0) {
882        return status;
883    } else if (args->ino == vn->ino_) {
884        // cannot rename node to itself
885        return ZX_ERR_BAD_STATE;
886    } else if (args->type != de->type) {
887        // cannot rename directory to file (or vice versa)
888        return ZX_ERR_BAD_STATE;
889    } else if ((status = vn->CanUnlink()) != ZX_OK) {
890        // if we cannot unlink the target, we cannot rename the target
891        return status;
892    }
893
894    // If we are renaming ON TOP of a directory, then we can skip
895    // updating the parent link count -- the old directory had a ".." entry to
896    // the parent (link count of 1), but the new directory will ALSO have a ".."
897    // entry, making the rename operation idempotent w.r.t. the parent link
898    // count.
899    vn->RemoveInodeLink(args->state->GetWork());
900
901    de->ino = args->ino;
902    status = vndir->WriteExactInternal(args->state, de, DirentSize(de->namelen), args->offs.off);
903    if (status != ZX_OK) {
904        return status;
905    }
906
907    args->state->GetWork()->PinVnode(vn);
908    args->state->GetWork()->PinVnode(vndir);
909    return DIR_CB_SAVE_SYNC;
910}
911
912zx_status_t VnodeMinfs::DirentCallbackUpdateInode(fbl::RefPtr<VnodeMinfs> vndir, minfs_dirent_t* de,
913                                                  DirArgs* args) {
914    if ((de->ino == 0) || fbl::StringPiece(de->name, de->namelen) != args->name) {
915        return do_next_dirent(de, &args->offs);
916    }
917
918    de->ino = args->ino;
919    zx_status_t status = vndir->WriteExactInternal(args->state, de,
920                                                   DirentSize(de->namelen),
921                                                   args->offs.off);
922    if (status != ZX_OK) {
923        return status;
924    }
925    args->state->GetWork()->PinVnode(vndir);
926    return DIR_CB_SAVE_SYNC;
927}
928
929zx_status_t VnodeMinfs::DirentCallbackFindSpace(fbl::RefPtr<VnodeMinfs> vndir, minfs_dirent_t* de,
930                                                DirArgs* args) {
931    uint32_t reclen = static_cast<uint32_t>(MinfsReclen(de, args->offs.off));
932    if (de->ino == 0) {
933        // empty entry, do we fit?
934        if (args->reclen > reclen) {
935            return do_next_dirent(de, &args->offs);
936        }
937        return DIR_CB_DONE;
938    } else {
939        // filled entry, can we sub-divide?
940        uint32_t size = static_cast<uint32_t>(DirentSize(de->namelen));
941        if (size > reclen) {
942            FS_TRACE_ERROR("bad reclen (smaller than dirent) %u < %u\n", reclen, size);
943            return ZX_ERR_IO;
944        }
945        uint32_t extra = reclen - size;
946        if (extra < args->reclen) {
947            return do_next_dirent(de, &args->offs);
948        }
949        return DIR_CB_DONE;
950    }
951}
952
953zx_status_t VnodeMinfs::AppendDirent(DirArgs* args) {
954    char data[kMinfsMaxDirentSize];
955    minfs_dirent_t* de = reinterpret_cast<minfs_dirent_t*>(data);
956    size_t r;
957    zx_status_t status = ReadInternal(data, kMinfsMaxDirentSize, args->offs.off, &r);
958    if (status != ZX_OK) {
959        return status;
960    } else if ((status = validate_dirent(de, r, args->offs.off)) != ZX_OK) {
961        return status;
962    }
963
964    uint32_t reclen = static_cast<uint32_t>(MinfsReclen(de, args->offs.off));
965    if (de->ino == 0) {
966        // empty entry, do we fit?
967        if (args->reclen > reclen) {
968            return ZX_ERR_NO_SPACE;
969        }
970    } else {
971        // filled entry, can we sub-divide?
972        uint32_t size = static_cast<uint32_t>(DirentSize(de->namelen));
973        if (size > reclen) {
974            FS_TRACE_ERROR("bad reclen (smaller than dirent) %u < %u\n", reclen, size);
975            return ZX_ERR_IO;
976        }
977        uint32_t extra = reclen - size;
978        if (extra < args->reclen) {
979            return ZX_ERR_NO_SPACE;
980        }
981        // shrink existing entry
982        bool was_last_record = de->reclen & kMinfsReclenLast;
983        de->reclen = size;
984        if ((status = WriteExactInternal(args->state, de,
985                                         DirentSize(de->namelen),
986                                         args->offs.off)) != ZX_OK) {
987            return status;
988        }
989
990        args->offs.off += size;
991        // Overwrite dirent data to reflect the new dirent.
992        de->reclen = extra | (was_last_record ? kMinfsReclenLast : 0);
993    }
994
995    de->ino = args->ino;
996    de->type = static_cast<uint8_t>(args->type);
997    de->namelen = static_cast<uint8_t>(args->name.length());
998    memcpy(de->name, args->name.data(), de->namelen);
999    if ((status = WriteExactInternal(args->state, de, DirentSize(de->namelen),
1000                                     args->offs.off)) != ZX_OK) {
1001        return status;
1002    }
1003
1004    if (args->type == kMinfsTypeDir) {
1005        // Child directory has '..' which will point to parent directory
1006        inode_.link_count++;
1007    }
1008
1009    inode_.dirent_count++;
1010    inode_.seq_num++;
1011    InodeSync(args->state->GetWork(), kMxFsSyncMtime);
1012    args->state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
1013    return ZX_OK;
1014}
1015
1016// Calls a callback 'func' on all direntries in a directory 'vn' with the
1017// provided arguments, reacting to the return code of the callback.
1018//
1019// When 'func' is called, it receives a few arguments:
1020//  'vndir': The directory on which the callback is operating
1021//  'de': A pointer the start of a single dirent.
1022//        Only DirentSize(de->namelen) bytes are guaranteed to exist in
1023//        memory from this starting pointer.
1024//  'args': Additional arguments plumbed through ForEachDirent
1025//  'offs': Offset info about where in the directory this direntry is located.
1026//          Since 'func' may create / remove surrounding dirents, it is responsible for
1027//          updating the offset information to access the next dirent.
1028zx_status_t VnodeMinfs::ForEachDirent(DirArgs* args, const DirentCallback func) {
1029    char data[kMinfsMaxDirentSize];
1030    minfs_dirent_t* de = (minfs_dirent_t*) data;
1031    args->offs.off = 0;
1032    args->offs.off_prev = 0;
1033    while (args->offs.off + MINFS_DIRENT_SIZE < kMinfsMaxDirectorySize) {
1034        xprintf("Reading dirent at offset %zd\n", args->offs.off);
1035        size_t r;
1036        zx_status_t status = ReadInternal(data, kMinfsMaxDirentSize, args->offs.off, &r);
1037        if (status != ZX_OK) {
1038            return status;
1039        } else if ((status = validate_dirent(de, r, args->offs.off)) != ZX_OK) {
1040            return status;
1041        }
1042
1043        switch ((status = func(fbl::RefPtr<VnodeMinfs>(this), de, args))) {
1044        case DIR_CB_NEXT:
1045            break;
1046        case DIR_CB_SAVE_SYNC:
1047            inode_.seq_num++;
1048            InodeSync(args->state->GetWork(), kMxFsSyncMtime);
1049            args->state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
1050            return ZX_OK;
1051        case DIR_CB_DONE:
1052        default:
1053            return status;
1054        }
1055    }
1056
1057    return ZX_ERR_NOT_FOUND;
1058}
1059
1060void VnodeMinfs::fbl_recycle() {
1061    ZX_DEBUG_ASSERT(fd_count_ == 0);
1062    if (!IsUnlinked()) {
1063        // If this node has not been purged already, remove it from the
1064        // hash map. If it has been purged; it will already be absent
1065        // from the map (and may have already been replaced with a new
1066        // node, if the inode has been re-used).
1067        fs_->VnodeRelease(this);
1068    }
1069    delete this;
1070}
1071
1072VnodeMinfs::~VnodeMinfs() {
1073#ifdef __Fuchsia__
1074    // Detach the vmoids from the underlying block device,
1075    // so the underlying VMO may be released.
1076    size_t request_count = 0;
1077    block_fifo_request_t request[2];
1078    if (vmo_.is_valid()) {
1079        request[request_count].group = fs_->bc_->BlockGroupID();
1080        request[request_count].vmoid = vmoid_;
1081        request[request_count].opcode = BLOCKIO_CLOSE_VMO;
1082        request_count++;
1083    }
1084    if (vmo_indirect_ != nullptr) {
1085        request[request_count].group = fs_->bc_->BlockGroupID();
1086        request[request_count].vmoid = vmoid_indirect_;
1087        request[request_count].opcode = BLOCKIO_CLOSE_VMO;
1088        request_count++;
1089    }
1090    if (request_count) {
1091        fs_->bc_->Transaction(&request[0], request_count);
1092    }
1093#endif
1094}
1095
1096zx_status_t VnodeMinfs::ValidateFlags(uint32_t flags) {
1097    xprintf("VnodeMinfs::ValidateFlags(0x%x) vn=%p(#%u)\n", flags, this, ino_);
1098    if ((flags & ZX_FS_FLAG_DIRECTORY) && !IsDirectory()) {
1099        return ZX_ERR_NOT_DIR;
1100    }
1101
1102    if ((flags & ZX_FS_RIGHT_WRITABLE) && IsDirectory()) {
1103        return ZX_ERR_NOT_FILE;
1104    }
1105    return ZX_OK;
1106}
1107
1108#ifdef __Fuchsia__
1109
1110#define ZXFIDL_OPERATION(Method)                                  \
1111template <typename... Args>                                       \
1112zx_status_t Method ## Op(void* ctx, Args... args) {               \
1113    TRACE_DURATION("vfs", #Method);                               \
1114    auto vn = reinterpret_cast<VnodeMinfs*>(ctx);                 \
1115    return (vn->VnodeMinfs::Method)(fbl::forward<Args>(args)...); \
1116}
1117
1118ZXFIDL_OPERATION(GetMetrics)
1119ZXFIDL_OPERATION(ToggleMetrics)
1120
1121const fuchsia_minfs_Minfs_ops kMinfsOps = {
1122    .GetMetrics = GetMetricsOp,
1123    .ToggleMetrics = ToggleMetricsOp,
1124};
1125
1126// MinfsConnection overrides the base Connection class to allow Minfs to
1127// dispatch its own ordinals.
1128class MinfsConnection : public fs::Connection {
1129public:
1130    MinfsConnection(fs::Vfs* vfs, fbl::RefPtr<fs::Vnode> vnode, zx::channel channel,
1131                    uint32_t flags)
1132            : Connection(vfs, fbl::move(vnode), fbl::move(channel), flags) {}
1133
1134private:
1135    zx_status_t HandleFsSpecificMessage(fidl_msg_t* msg, fidl_txn_t* txn) final {
1136        fidl_message_header_t* hdr = reinterpret_cast<fidl_message_header_t*>(msg->bytes);
1137        if (hdr->ordinal >= fuchsia_minfs_MinfsGetMetricsOrdinal &&
1138            hdr->ordinal <= fuchsia_minfs_MinfsToggleMetricsOrdinal) {
1139            return fuchsia_minfs_Minfs_dispatch(this, txn, msg, &kMinfsOps);
1140        }
1141        zx_handle_close_many(msg->handles, msg->num_handles);
1142        return ZX_ERR_NOT_SUPPORTED;
1143    }
1144};
1145
1146zx_status_t VnodeMinfs::Serve(fs::Vfs* vfs, zx::channel channel, uint32_t flags) {
1147    return vfs->ServeConnection(fbl::make_unique<MinfsConnection>(
1148        vfs, fbl::WrapRefPtr(this), fbl::move(channel), flags));
1149}
1150#endif
1151
1152zx_status_t VnodeMinfs::Open(uint32_t flags, fbl::RefPtr<Vnode>* out_redirect) {
1153    fd_count_++;
1154    return ZX_OK;
1155}
1156
1157void VnodeMinfs::Purge(WritebackWork* wb) {
1158    ZX_DEBUG_ASSERT(fd_count_ == 0);
1159    ZX_DEBUG_ASSERT(IsUnlinked());
1160    fs_->VnodeRelease(this);
1161#ifdef __Fuchsia__
1162    // TODO(smklein): Only init indirect vmo if it's needed
1163    if (InitIndirectVmo() == ZX_OK) {
1164        fs_->InoFree(this, wb);
1165    } else {
1166        fprintf(stderr, "minfs: Failed to Init Indirect VMO while purging %u\n", ino_);
1167    }
1168#else
1169    fs_->InoFree(this, wb);
1170#endif
1171}
1172
1173zx_status_t VnodeMinfs::Close() {
1174    ZX_DEBUG_ASSERT_MSG(fd_count_ > 0, "Closing ino with no fds open");
1175    fd_count_--;
1176
1177    if (fd_count_ == 0 && IsUnlinked()) {
1178        fbl::unique_ptr<Transaction> state;
1179        fs_->BeginTransaction(0, 0, &state);
1180        Purge(state->GetWork());
1181        fs_->CommitTransaction(fbl::move(state));
1182    }
1183    return ZX_OK;
1184}
1185
1186zx_status_t VnodeMinfs::Read(void* data, size_t len, size_t off, size_t* out_actual) {
1187    TRACE_DURATION("minfs", "VnodeMinfs::Read", "ino", ino_, "len", len, "off", off);
1188    ZX_DEBUG_ASSERT_MSG(fd_count_ > 0, "Reading from ino with no fds open");
1189    xprintf("minfs_read() vn=%p(#%u) len=%zd off=%zd\n", this, ino_, len, off);
1190    if (IsDirectory()) {
1191        return ZX_ERR_NOT_FILE;
1192    }
1193
1194    fs::Ticker ticker(fs_->StartTicker());
1195    auto get_metrics = fbl::MakeAutoCall([&ticker, &out_actual, this]() {
1196        fs_->UpdateReadMetrics(*out_actual, ticker.End());
1197    });
1198
1199    zx_status_t status = ReadInternal(data, len, off, out_actual);
1200    if (status != ZX_OK) {
1201        return status;
1202    }
1203    return ZX_OK;
1204}
1205
1206// Internal read. Usable on directories.
1207zx_status_t VnodeMinfs::ReadInternal(void* data, size_t len, size_t off, size_t* actual) {
1208    // clip to EOF
1209    if (off >= inode_.size) {
1210        *actual = 0;
1211        return ZX_OK;
1212    }
1213    if (len > (inode_.size - off)) {
1214        len = inode_.size - off;
1215    }
1216
1217    zx_status_t status;
1218#ifdef __Fuchsia__
1219    if ((status = InitVmo()) != ZX_OK) {
1220        return status;
1221    } else if ((status = vmo_.read(data, off, len)) != ZX_OK) {
1222        return status;
1223    } else {
1224        *actual = len;
1225    }
1226#else
1227    void* start = data;
1228    uint32_t n = off / kMinfsBlockSize;
1229    size_t adjust = off % kMinfsBlockSize;
1230
1231    while ((len > 0) && (n < kMinfsMaxFileBlock)) {
1232        size_t xfer;
1233        if (len > (kMinfsBlockSize - adjust)) {
1234            xfer = kMinfsBlockSize - adjust;
1235        } else {
1236            xfer = len;
1237        }
1238
1239        blk_t bno;
1240        if ((status = BlockGet(nullptr, n, &bno)) != ZX_OK) {
1241            return status;
1242        }
1243        if (bno != 0) {
1244            char bdata[kMinfsBlockSize];
1245            if (fs_->ReadDat(bno, bdata)) {
1246                FS_TRACE_ERROR("minfs: Failed to read data block %u\n", bno);
1247                return ZX_ERR_IO;
1248            }
1249            memcpy(data, bdata + adjust, xfer);
1250        } else {
1251            // If the block is not allocated, just read zeros
1252            memset(data, 0, xfer);
1253        }
1254
1255        adjust = 0;
1256        len -= xfer;
1257        data = (void*)((uintptr_t)data + xfer);
1258        n++;
1259    }
1260    *actual = (uintptr_t)data - (uintptr_t)start;
1261#endif
1262    return ZX_OK;
1263}
1264
1265zx_status_t VnodeMinfs::Write(const void* data, size_t len, size_t offset,
1266                              size_t* out_actual) {
1267    TRACE_DURATION("minfs", "VnodeMinfs::Write", "ino", ino_, "len", len, "off", offset);
1268    ZX_DEBUG_ASSERT_MSG(fd_count_ > 0, "Writing to ino with no fds open");
1269    xprintf("minfs_write() vn=%p(#%u) len=%zd off=%zd\n", this, ino_, len, offset);
1270    if (IsDirectory()) {
1271        return ZX_ERR_NOT_FILE;
1272    }
1273
1274    *out_actual = 0;
1275    fs::Ticker ticker(fs_->StartTicker());
1276    auto get_metrics = fbl::MakeAutoCall([&ticker, &out_actual, this]() {
1277        fs_->UpdateWriteMetrics(*out_actual, ticker.End());
1278    });
1279
1280    blk_t reserve_blocks;
1281    // Calculate maximum number of blocks to reserve for this write operation.
1282    zx_status_t status = GetRequiredBlockCount(offset, len, &reserve_blocks);
1283    if (status != ZX_OK) {
1284        return status;
1285    }
1286    fbl::unique_ptr<Transaction> state;
1287    if ((status = fs_->BeginTransaction(0, reserve_blocks, &state)) != ZX_OK) {
1288        return status;
1289    }
1290
1291    status = WriteInternal(state.get(), data, len, offset, out_actual);
1292    if (status != ZX_OK) {
1293        return status;
1294    }
1295    if (*out_actual != 0) {
1296        InodeSync(state->GetWork(), kMxFsSyncMtime);  // Successful writes updates mtime
1297        state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
1298        fs_->CommitTransaction(fbl::move(state));
1299    }
1300    return ZX_OK;
1301}
1302
1303zx_status_t VnodeMinfs::Append(const void* data, size_t len, size_t* out_end,
1304                               size_t* out_actual) {
1305    zx_status_t status = Write(data, len, inode_.size, out_actual);
1306    *out_end = inode_.size;
1307    return status;
1308}
1309
1310// Internal write. Usable on directories.
1311zx_status_t VnodeMinfs::WriteInternal(Transaction* state, const void* data,
1312                                      size_t len, size_t off, size_t* actual) {
1313    if (len == 0) {
1314        *actual = 0;
1315        return ZX_OK;
1316    }
1317
1318    zx_status_t status;
1319#ifdef __Fuchsia__
1320    if ((status = InitVmo()) != ZX_OK) {
1321        return status;
1322    }
1323#else
1324    size_t max_size = off + len;
1325#endif
1326    const void* const start = data;
1327    uint32_t n = static_cast<uint32_t>(off / kMinfsBlockSize);
1328    size_t adjust = off % kMinfsBlockSize;
1329
1330    while ((len > 0) && (n < kMinfsMaxFileBlock)) {
1331        size_t xfer;
1332        if (len > (kMinfsBlockSize - adjust)) {
1333            xfer = kMinfsBlockSize - adjust;
1334        } else {
1335            xfer = len;
1336        }
1337
1338#ifdef __Fuchsia__
1339        size_t xfer_off = n * kMinfsBlockSize + adjust;
1340        if ((xfer_off + xfer) > vmo_size_) {
1341            size_t new_size = fbl::round_up(xfer_off + xfer, kMinfsBlockSize);
1342            ZX_DEBUG_ASSERT(new_size >= inode_.size); // Overflow.
1343            if ((status = vmo_.set_size(new_size)) != ZX_OK) {
1344                goto done;
1345            }
1346            vmo_size_ = new_size;
1347        }
1348
1349        // Update this block of the in-memory VMO
1350        if ((status = vmo_.write(data, xfer_off, xfer)) != ZX_OK) {
1351            goto done;
1352        }
1353
1354        // Update this block on-disk
1355        blk_t bno;
1356        if ((status = BlockGet(state, n, &bno))) {
1357            goto done;
1358        }
1359        ZX_DEBUG_ASSERT(bno != 0);
1360        state->GetWork()->Enqueue(vmo_.get(), n, bno + fs_->Info().dat_block, 1);
1361#else
1362        blk_t bno;
1363        if ((status = BlockGet(state, n, &bno))) {
1364            goto done;
1365        }
1366        ZX_DEBUG_ASSERT(bno != 0);
1367        char wdata[kMinfsBlockSize];
1368        if (fs_->bc_->Readblk(bno + fs_->Info().dat_block, wdata)) {
1369            goto done;
1370        }
1371        memcpy(wdata + adjust, data, xfer);
1372        if (len < kMinfsBlockSize && max_size >= inode_.size) {
1373            memset(wdata + adjust + xfer, 0, kMinfsBlockSize - (adjust + xfer));
1374        }
1375        if (fs_->bc_->Writeblk(bno + fs_->Info().dat_block, wdata)) {
1376            goto done;
1377        }
1378#endif
1379
1380        adjust = 0;
1381        len -= xfer;
1382        data = (void*)((uintptr_t)(data) + xfer);
1383        n++;
1384    }
1385
1386done:
1387    len = (uintptr_t)data - (uintptr_t)start;
1388    if (len == 0) {
1389        // If more than zero bytes were requested, but zero bytes were written,
1390        // return an error explicitly (rather than zero).
1391        if (off >= kMinfsMaxFileSize) {
1392            return ZX_ERR_FILE_BIG;
1393        }
1394
1395        return ZX_ERR_NO_SPACE;
1396    }
1397
1398    if ((off + len) > inode_.size) {
1399        inode_.size = static_cast<uint32_t>(off + len);
1400    }
1401
1402    *actual = len;
1403    ValidateVmoTail();
1404    return ZX_OK;
1405}
1406
1407zx_status_t VnodeMinfs::Lookup(fbl::RefPtr<fs::Vnode>* out, fbl::StringPiece name) {
1408    TRACE_DURATION("minfs", "VnodeMinfs::Lookup", "name", name);
1409    ZX_DEBUG_ASSERT(fs::vfs_valid_name(name));
1410
1411    if (!IsDirectory()) {
1412        FS_TRACE_ERROR("not directory\n");
1413        return ZX_ERR_NOT_SUPPORTED;
1414    }
1415
1416    return LookupInternal(out, name);
1417}
1418
1419zx_status_t VnodeMinfs::LookupInternal(fbl::RefPtr<fs::Vnode>* out, fbl::StringPiece name) {
1420    DirArgs args = DirArgs();
1421    args.name = name;
1422    zx_status_t status;
1423    bool success = false;
1424    fs::Ticker ticker(fs_->StartTicker());
1425    auto get_metrics = fbl::MakeAutoCall([&ticker, &success, this]() {
1426        fs_->UpdateLookupMetrics(success, ticker.End());
1427    });
1428    if ((status = ForEachDirent(&args, DirentCallbackFind)) < 0) {
1429        return status;
1430    }
1431    fbl::RefPtr<VnodeMinfs> vn;
1432    if ((status = fs_->VnodeGet(&vn, args.ino)) < 0) {
1433        return status;
1434    }
1435    *out = fbl::move(vn);
1436    success = (status == ZX_OK);
1437    return status;
1438}
1439
1440zx_status_t VnodeMinfs::Getattr(vnattr_t* a) {
1441    xprintf("minfs_getattr() vn=%p(#%u)\n", this, ino_);
1442    a->mode = DTYPE_TO_VTYPE(MinfsMagicType(inode_.magic)) |
1443            V_IRUSR | V_IWUSR | V_IRGRP | V_IROTH;
1444    a->inode = ino_;
1445    a->size = inode_.size;
1446    a->blksize = kMinfsBlockSize;
1447    a->blkcount = inode_.block_count * (kMinfsBlockSize / VNATTR_BLKSIZE);
1448    a->nlink = inode_.link_count;
1449    a->create_time = inode_.create_time;
1450    a->modify_time = inode_.modify_time;
1451    return ZX_OK;
1452}
1453
1454zx_status_t VnodeMinfs::Setattr(const vnattr_t* a) {
1455    int dirty = 0;
1456    xprintf("minfs_setattr() vn=%p(#%u)\n", this, ino_);
1457    if ((a->valid & ~(ATTR_CTIME|ATTR_MTIME)) != 0) {
1458        return ZX_ERR_NOT_SUPPORTED;
1459    }
1460    if ((a->valid & ATTR_CTIME) != 0) {
1461        inode_.create_time = a->create_time;
1462        dirty = 1;
1463    }
1464    if ((a->valid & ATTR_MTIME) != 0) {
1465        inode_.modify_time = a->modify_time;
1466        dirty = 1;
1467    }
1468    if (dirty) {
1469        // write to disk, but don't overwrite the time
1470        fbl::unique_ptr<Transaction> state;
1471        ZX_ASSERT(fs_->BeginTransaction(0, 0, &state) == ZX_OK);
1472        InodeSync(state->GetWork(), kMxFsSyncDefault);
1473        state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
1474        fs_->CommitTransaction(fbl::move(state));
1475    }
1476    return ZX_OK;
1477}
1478
1479typedef struct dircookie {
1480    size_t off;        // Offset into directory
1481    uint32_t reserved; // Unused
1482    uint32_t seqno;    // inode seq no
1483} dircookie_t;
1484
1485static_assert(sizeof(dircookie_t) <= sizeof(fs::vdircookie_t),
1486              "MinFS dircookie too large to fit in IO state");
1487
1488zx_status_t VnodeMinfs::Readdir(fs::vdircookie_t* cookie, void* dirents, size_t len,
1489                                size_t* out_actual) {
1490    TRACE_DURATION("minfs", "VnodeMinfs::Readdir");
1491    xprintf("minfs_readdir() vn=%p(#%u) cookie=%p len=%zd\n", this, ino_, cookie, len);
1492    dircookie_t* dc = reinterpret_cast<dircookie_t*>(cookie);
1493    fs::DirentFiller df(dirents, len);
1494
1495    if (!IsDirectory()) {
1496        return ZX_ERR_NOT_SUPPORTED;
1497    }
1498
1499    size_t off = dc->off;
1500    size_t r;
1501    char data[kMinfsMaxDirentSize];
1502    minfs_dirent_t* de = (minfs_dirent_t*) data;
1503
1504    if (off != 0 && dc->seqno != inode_.seq_num) {
1505        // The offset *might* be invalid, if we called Readdir after a directory
1506        // has been modified. In this case, we need to re-read the directory
1507        // until we get to the direntry at or after the previously identified offset.
1508
1509        size_t off_recovered = 0;
1510        while (off_recovered < off) {
1511            if (off_recovered + MINFS_DIRENT_SIZE >= kMinfsMaxDirectorySize) {
1512                FS_TRACE_ERROR("minfs: Readdir: Corrupt dirent; dirent reclen too large\n");
1513                goto fail;
1514            }
1515            zx_status_t status = ReadInternal(de, kMinfsMaxDirentSize, off_recovered, &r);
1516            if ((status != ZX_OK) || (validate_dirent(de, r, off_recovered) != ZX_OK)) {
1517                FS_TRACE_ERROR("minfs: Readdir: Corrupt dirent unreadable/failed validation\n");
1518                goto fail;
1519            }
1520            off_recovered += MinfsReclen(de, off_recovered);
1521        }
1522        off = off_recovered;
1523    }
1524
1525    while (off + MINFS_DIRENT_SIZE < kMinfsMaxDirectorySize) {
1526        zx_status_t status = ReadInternal(de, kMinfsMaxDirentSize, off, &r);
1527        if (status != ZX_OK) {
1528            FS_TRACE_ERROR("minfs: Readdir: Unreadable dirent\n");
1529            goto fail;
1530        } else if (validate_dirent(de, r, off) != ZX_OK) {
1531            FS_TRACE_ERROR("minfs: Readdir: Corrupt dirent failed validation\n");
1532            goto fail;
1533        }
1534
1535        fbl::StringPiece name(de->name, de->namelen);
1536
1537        if (de->ino && name != "..") {
1538            zx_status_t status;
1539            if ((status = df.Next(name, de->type, de->ino)) != ZX_OK) {
1540                // no more space
1541                goto done;
1542            }
1543        }
1544
1545        off += MinfsReclen(de, off);
1546    }
1547
1548done:
1549    // save our place in the dircookie
1550    dc->off = off;
1551    dc->seqno = inode_.seq_num;
1552    *out_actual = df.BytesFilled();
1553    ZX_DEBUG_ASSERT(*out_actual <= len); // Otherwise, we're overflowing the input buffer.
1554    return ZX_OK;
1555
1556fail:
1557    dc->off = 0;
1558    return ZX_ERR_IO;
1559}
1560
1561VnodeMinfs::VnodeMinfs(Minfs* fs) : fs_(fs) {}
1562
1563#ifdef __Fuchsia__
1564void VnodeMinfs::Notify(fbl::StringPiece name, unsigned event) { watcher_.Notify(name, event); }
1565zx_status_t VnodeMinfs::WatchDir(fs::Vfs* vfs, uint32_t mask, uint32_t options,
1566                                 zx::channel watcher) {
1567    return watcher_.WatchDir(vfs, this, mask, options, fbl::move(watcher));
1568}
1569
1570bool VnodeMinfs::IsRemote() const { return remoter_.IsRemote(); }
1571zx::channel VnodeMinfs::DetachRemote() { return remoter_.DetachRemote(); }
1572zx_handle_t VnodeMinfs::GetRemote() const { return remoter_.GetRemote(); }
1573void VnodeMinfs::SetRemote(zx::channel remote) { return remoter_.SetRemote(fbl::move(remote)); }
1574
1575#endif
1576
1577void VnodeMinfs::Allocate(Minfs* fs, uint32_t type, fbl::RefPtr<VnodeMinfs>* out) {
1578    *out = fbl::AdoptRef(new VnodeMinfs(fs));
1579    memset(&(*out)->inode_, 0, sizeof((*out)->inode_));
1580    (*out)->inode_.magic = MinfsMagic(type);
1581    (*out)->inode_.create_time = (*out)->inode_.modify_time = minfs_gettime_utc();
1582    (*out)->inode_.link_count = (type == kMinfsTypeDir ? 2 : 1);
1583}
1584
1585zx_status_t VnodeMinfs::Recreate(Minfs* fs, ino_t ino, fbl::RefPtr<VnodeMinfs>* out) {
1586    fbl::AllocChecker ac;
1587    *out = fbl::AdoptRef(new (&ac) VnodeMinfs(fs));
1588    if (!ac.check()) {
1589        return ZX_ERR_NO_MEMORY;
1590    }
1591    fs->InodeLoad(ino, &(*out)->inode_);
1592    (*out)->ino_ = ino;
1593    return ZX_OK;
1594}
1595
1596zx_status_t VnodeMinfs::Create(fbl::RefPtr<fs::Vnode>* out, fbl::StringPiece name, uint32_t mode) {
1597    TRACE_DURATION("minfs", "VnodeMinfs::Create", "name", name);
1598    ZX_DEBUG_ASSERT(fs::vfs_valid_name(name));
1599
1600    bool success = false;
1601    fs::Ticker ticker(fs_->StartTicker());
1602    auto get_metrics = fbl::MakeAutoCall([&ticker, &success, this]() {
1603        fs_->UpdateCreateMetrics(success, ticker.End());
1604    });
1605
1606    if (!IsDirectory()) {
1607        return ZX_ERR_NOT_SUPPORTED;
1608    }
1609    if (IsUnlinked()) {
1610        return ZX_ERR_BAD_STATE;
1611    }
1612
1613    DirArgs args = DirArgs();
1614    args.name = name;
1615    // ensure file does not exist
1616    zx_status_t status;
1617    if ((status = ForEachDirent(&args, DirentCallbackFind)) != ZX_ERR_NOT_FOUND) {
1618        return ZX_ERR_ALREADY_EXISTS;
1619    }
1620
1621    // creating a directory?
1622    uint32_t type = S_ISDIR(mode) ? kMinfsTypeDir : kMinfsTypeFile;
1623
1624    // Ensure that we have enough space to write the new vnode's direntry
1625    // before updating any other metadata.
1626    args.type = type;
1627    args.reclen = static_cast<uint32_t>(DirentSize(static_cast<uint8_t>(name.length())));
1628    status = ForEachDirent(&args, DirentCallbackFindSpace);
1629    if (status == ZX_ERR_NOT_FOUND) {
1630        return ZX_ERR_NO_SPACE;
1631    } else if (status != ZX_OK) {
1632        return status;
1633    }
1634
1635    // Calculate maximum blocks to reserve for the current directory, based on the size and offset
1636    // of the new direntry (Assuming that the offset is the current size of the directory).
1637    blk_t reserve_blocks = 0;
1638    if ((status = GetRequiredBlockCount(inode_.size, args.reclen, &reserve_blocks)) != ZX_OK) {
1639        return status;
1640    }
1641
1642    // Reserve 1 additional block for the new directory's initial . and .. entries.
1643    reserve_blocks += 1;
1644
1645    // In addition to reserve_blocks, reserve 1 inode for the vnode to be created.
1646    fbl::unique_ptr<Transaction> state;
1647    if ((status = fs_->BeginTransaction(1, reserve_blocks, &state)) != ZX_OK) {
1648        return status;
1649    }
1650
1651    // mint a new inode and vnode for it
1652    fbl::RefPtr<VnodeMinfs> vn;
1653    if ((status = fs_->VnodeNew(state.get(), &vn, type)) < 0) {
1654        return status;
1655    }
1656
1657    // If the new node is a directory, fill it with '.' and '..'.
1658    if (type == kMinfsTypeDir) {
1659        char bdata[DirentSize(1) + DirentSize(2)];
1660        minfs_dir_init(bdata, vn->ino_, ino_);
1661        size_t expected = DirentSize(1) + DirentSize(2);
1662        if ((status = vn->WriteExactInternal(state.get(), bdata, expected, 0)) != ZX_OK) {
1663            FS_TRACE_ERROR("minfs: Create: Failed to initialize empty directory: %d\n", status);
1664            return ZX_ERR_IO;
1665        }
1666        vn->inode_.dirent_count = 2;
1667        vn->InodeSync(state->GetWork(), kMxFsSyncDefault);
1668    }
1669
1670    // add directory entry for the new child node
1671    args.ino = vn->ino_;
1672    args.state = state.get();
1673    if ((status = AppendDirent(&args)) != ZX_OK) {
1674        return status;
1675    }
1676
1677    state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
1678    state->GetWork()->PinVnode(vn);
1679    fs_->CommitTransaction(fbl::move(state));
1680
1681    vn->fd_count_ = 1;
1682    *out = fbl::move(vn);
1683    success = (status == ZX_OK);
1684    return status;
1685}
1686
1687#ifdef __Fuchsia__
1688
1689constexpr const char kFsName[] = "minfs";
1690
1691zx_status_t VnodeMinfs::QueryFilesystem(fuchsia_io_FilesystemInfo* info) {
1692    static_assert(fbl::constexpr_strlen(kFsName) + 1 < fuchsia_io_MAX_FS_NAME_BUFFER,
1693                  "Minfs name too long");
1694    memset(info, 0, sizeof(*info));
1695    info->block_size = kMinfsBlockSize;
1696    info->max_filename_size = kMinfsMaxNameSize;
1697    info->fs_type = VFS_TYPE_MINFS;
1698    info->fs_id = fs_->GetFsId();
1699    info->total_bytes = fs_->Info().block_count * fs_->Info().block_size;
1700    info->used_bytes = fs_->Info().alloc_block_count * fs_->Info().block_size;
1701    info->total_nodes = fs_->Info().inode_count;
1702    info->used_nodes = fs_->Info().alloc_inode_count;
1703
1704    fvm_info_t fvm_info;
1705    if (fs_->FVMQuery(&fvm_info) == ZX_OK) {
1706        uint64_t free_slices = fvm_info.pslice_total_count - fvm_info.pslice_allocated_count;
1707        info->free_shared_pool_bytes = fvm_info.slice_size * free_slices;
1708    }
1709
1710    strlcpy(reinterpret_cast<char*>(info->name), kFsName, fuchsia_io_MAX_FS_NAME_BUFFER);
1711    return ZX_OK;
1712}
1713
1714zx_status_t VnodeMinfs::GetDevicePath(size_t buffer_len, char* out_name, size_t* out_len) {
1715    return fs_->bc_->GetDevicePath(buffer_len, out_name, out_len);
1716}
1717
1718zx_status_t VnodeMinfs::GetMetrics(fidl_txn_t* txn) {
1719    fuchsia_minfs_Metrics metrics;
1720    zx_status_t status = fs_->GetMetrics(&metrics);
1721    return fuchsia_minfs_MinfsGetMetrics_reply(txn, status, status == ZX_OK ? &metrics : nullptr);
1722}
1723
1724zx_status_t VnodeMinfs::ToggleMetrics(bool enable, fidl_txn_t* txn) {
1725    fs_->SetMetrics(enable);
1726    return fuchsia_minfs_MinfsToggleMetrics_reply(txn, ZX_OK);
1727}
1728
1729#endif
1730
1731zx_status_t VnodeMinfs::Unlink(fbl::StringPiece name, bool must_be_dir) {
1732    TRACE_DURATION("minfs", "VnodeMinfs::Unlink", "name", name);
1733    ZX_DEBUG_ASSERT(fs::vfs_valid_name(name));
1734    bool success = false;
1735    fs::Ticker ticker(fs_->StartTicker());
1736    auto get_metrics = fbl::MakeAutoCall([&ticker, &success, this]() {
1737        fs_->UpdateUnlinkMetrics(success, ticker.End());
1738    });
1739
1740    if (!IsDirectory()) {
1741        return ZX_ERR_NOT_SUPPORTED;
1742    }
1743    zx_status_t status;
1744    fbl::unique_ptr<Transaction> state;
1745    ZX_ASSERT(fs_->BeginTransaction(0, 0, &state) == ZX_OK);
1746    DirArgs args = DirArgs();
1747    args.name = name;
1748    args.type = must_be_dir ? kMinfsTypeDir : 0;
1749    args.state = state.get();
1750    status = ForEachDirent(&args, DirentCallbackUnlink);
1751    if (status == ZX_OK) {
1752        state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
1753        fs_->CommitTransaction(fbl::move(state));
1754    }
1755    success = (status == ZX_OK);
1756    return status;
1757}
1758
1759zx_status_t VnodeMinfs::Truncate(size_t len) {
1760    TRACE_DURATION("minfs", "VnodeMinfs::Truncate");
1761    if (IsDirectory()) {
1762        return ZX_ERR_NOT_FILE;
1763    }
1764
1765    fs::Ticker ticker(fs_->StartTicker());
1766    auto get_metrics = fbl::MakeAutoCall([&ticker, this] {
1767        fs_->UpdateTruncateMetrics(ticker.End());
1768    });
1769
1770    fbl::unique_ptr<Transaction> state;
1771    // Since we will only edit existing blocks, no new blocks are required.
1772    ZX_ASSERT(fs_->BeginTransaction(0, 0, &state) == ZX_OK);
1773    zx_status_t status = TruncateInternal(state.get(), len);
1774    if (status == ZX_OK) {
1775        // Successful truncates update inode
1776        InodeSync(state->GetWork(), kMxFsSyncMtime);
1777    }
1778    state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
1779    fs_->CommitTransaction(fbl::move(state));
1780    return status;
1781}
1782
1783zx_status_t VnodeMinfs::TruncateInternal(Transaction* state, size_t len) {
1784    zx_status_t r = 0;
1785#ifdef __Fuchsia__
1786    // TODO(smklein): We should only init up to 'len'; no need
1787    // to read in the portion of a large file we plan on deleting.
1788    if ((r = InitVmo()) != ZX_OK) {
1789        FS_TRACE_ERROR("minfs: Truncate failed to initialize VMO: %d\n", r);
1790        return ZX_ERR_IO;
1791    }
1792#endif
1793
1794    if (len < inode_.size) {
1795        // Truncate should make the file shorter.
1796        blk_t bno = inode_.size / kMinfsBlockSize;
1797
1798        // Truncate to the nearest block.
1799        blk_t trunc_bno = static_cast<blk_t>(len / kMinfsBlockSize);
1800        // [start_bno, EOF) blocks should be deleted entirely.
1801        blk_t start_bno = static_cast<blk_t>((len % kMinfsBlockSize == 0) ?
1802                                             trunc_bno : trunc_bno + 1);
1803        if ((r = BlocksShrink(state, start_bno)) < 0) {
1804            return r;
1805        }
1806
1807#ifdef __Fuchsia__
1808        uint64_t decommit_offset = fbl::round_up(len, kMinfsBlockSize);
1809        uint64_t decommit_length = fbl::round_up(inode_.size, kMinfsBlockSize) - decommit_offset;
1810        if (decommit_length > 0) {
1811            ZX_ASSERT(vmo_.op_range(ZX_VMO_OP_DECOMMIT, decommit_offset,
1812                                    decommit_length, nullptr, 0) == ZX_OK);
1813        }
1814#endif
1815
1816        if (start_bno * kMinfsBlockSize < inode_.size) {
1817            inode_.size = start_bno * kMinfsBlockSize;
1818        }
1819
1820        // Write zeroes to the rest of the remaining block, if it exists
1821        if (len < inode_.size) {
1822            char bdata[kMinfsBlockSize];
1823            blk_t rel_bno = static_cast<blk_t>(len / kMinfsBlockSize);
1824            if ((r = BlockGet(nullptr, rel_bno, &bno)) != ZX_OK) {
1825                FS_TRACE_ERROR("minfs: Truncate failed to get block %u of file: %d\n",
1826                               rel_bno, r);
1827                return ZX_ERR_IO;
1828            }
1829            if (bno != 0) {
1830                size_t adjust = len % kMinfsBlockSize;
1831#ifdef __Fuchsia__
1832                if ((r = vmo_.read(bdata, len - adjust, adjust)) != ZX_OK) {
1833                    FS_TRACE_ERROR("minfs: Truncate failed to read last block: %d\n", r);
1834                    return ZX_ERR_IO;
1835                }
1836                memset(bdata + adjust, 0, kMinfsBlockSize - adjust);
1837
1838                if ((r = vmo_.write(bdata, len - adjust, kMinfsBlockSize)) != ZX_OK) {
1839                    FS_TRACE_ERROR("minfs: Truncate failed to write last block: %d\n", r);
1840                    return ZX_ERR_IO;
1841                }
1842                state->GetWork()->Enqueue(vmo_.get(), rel_bno, bno + fs_->Info().dat_block, 1);
1843#else
1844                if (fs_->bc_->Readblk(bno + fs_->Info().dat_block, bdata)) {
1845                    return ZX_ERR_IO;
1846                }
1847                memset(bdata + adjust, 0, kMinfsBlockSize - adjust);
1848                if (fs_->bc_->Writeblk(bno + fs_->Info().dat_block, bdata)) {
1849                    return ZX_ERR_IO;
1850                }
1851#endif
1852            }
1853        }
1854    } else if (len > inode_.size) {
1855        // Truncate should make the file longer, filled with zeroes.
1856        if (kMinfsMaxFileSize < len) {
1857            return ZX_ERR_INVALID_ARGS;
1858        }
1859#ifdef __Fuchsia__
1860        uint64_t new_size = fbl::round_up(len, kMinfsBlockSize);
1861        if ((r = vmo_.set_size(new_size)) != ZX_OK) {
1862            return r;
1863        }
1864        vmo_size_ = new_size;
1865#endif
1866    } else {
1867        return ZX_OK;
1868    }
1869
1870    inode_.size = static_cast<uint32_t>(len);
1871    ValidateVmoTail();
1872    return ZX_OK;
1873}
1874
1875// Verify that the 'newdir' inode is not a subdirectory of the source.
1876zx_status_t VnodeMinfs::CheckNotSubdirectory(fbl::RefPtr<VnodeMinfs> newdir) {
1877    fbl::RefPtr<VnodeMinfs> vn = newdir;
1878    zx_status_t status = ZX_OK;
1879    while (vn->ino_ != kMinfsRootIno) {
1880        if (vn->ino_ == ino_) {
1881            status = ZX_ERR_INVALID_ARGS;
1882            break;
1883        }
1884
1885        fbl::RefPtr<fs::Vnode> out = nullptr;
1886        if ((status = vn->LookupInternal(&out, "..")) < 0) {
1887            break;
1888        }
1889        vn = fbl::RefPtr<VnodeMinfs>::Downcast(out);
1890    }
1891    return status;
1892}
1893
1894zx_status_t VnodeMinfs::Rename(fbl::RefPtr<fs::Vnode> _newdir, fbl::StringPiece oldname,
1895                               fbl::StringPiece newname, bool src_must_be_dir,
1896                               bool dst_must_be_dir) {
1897    TRACE_DURATION("minfs", "VnodeMinfs::Rename", "src", oldname, "dst", newname);
1898    bool success = false;
1899    fs::Ticker ticker(fs_->StartTicker());
1900    auto get_metrics = fbl::MakeAutoCall([&ticker, &success, this](){
1901        fs_->UpdateRenameMetrics(success, ticker.End());
1902    });
1903
1904    auto newdir = fbl::RefPtr<VnodeMinfs>::Downcast(_newdir);
1905    ZX_DEBUG_ASSERT(fs::vfs_valid_name(oldname));
1906    ZX_DEBUG_ASSERT(fs::vfs_valid_name(newname));
1907
1908    // ensure that the vnodes containing oldname and newname are directories
1909    if (!(IsDirectory() && newdir->IsDirectory())) {
1910        return ZX_ERR_NOT_SUPPORTED;
1911    }
1912
1913    zx_status_t status;
1914    fbl::RefPtr<VnodeMinfs> oldvn = nullptr;
1915    // acquire the 'oldname' node (it must exist)
1916    DirArgs args = DirArgs();
1917    args.name = oldname;
1918    if ((status = ForEachDirent(&args, DirentCallbackFind)) < 0) {
1919        return status;
1920    } else if ((status = fs_->VnodeGet(&oldvn, args.ino)) < 0) {
1921        return status;
1922    } else if ((status = oldvn->CheckNotSubdirectory(newdir)) < 0) {
1923        return status;
1924    }
1925
1926    // If either the 'src' or 'dst' must be directories, BOTH of them must be directories.
1927    if (!oldvn->IsDirectory() && (src_must_be_dir || dst_must_be_dir)) {
1928        return ZX_ERR_NOT_DIR;
1929    } else if ((newdir->ino_ == ino_) && (oldname == newname)) {
1930        // Renaming a file or directory to itself?
1931        // Shortcut success case.
1932        success = true;
1933        return ZX_OK;
1934    }
1935
1936    // Ensure that we have enough space to write the vnode's new direntry
1937    // before updating any other metadata.
1938    args.type = oldvn->IsDirectory() ? kMinfsTypeDir : kMinfsTypeFile;
1939    args.reclen = static_cast<uint32_t>(DirentSize(static_cast<uint8_t>(newname.length())));
1940
1941    status = newdir->ForEachDirent(&args, DirentCallbackFindSpace);
1942    if (status == ZX_ERR_NOT_FOUND) {
1943        return ZX_ERR_NO_SPACE;
1944    } else if (status != ZX_OK) {
1945        return status;
1946    }
1947
1948    DirectoryOffset append_offs = args.offs;
1949
1950    // Reserve potential blocks to add a new direntry to newdir.
1951    blk_t reserved_blocks;
1952    if ((status = GetRequiredBlockCount(newdir->GetInode()->size, args.reclen, &reserved_blocks))
1953        != ZX_OK) {
1954        return status;
1955    }
1956
1957    fbl::unique_ptr<Transaction> state;
1958    if ((status = fs_->BeginTransaction(0, reserved_blocks, &state)) != ZX_OK) {
1959        return status;
1960    }
1961
1962    // If the entry for 'newname' exists, make sure it can be replaced by
1963    // the vnode behind 'oldname'.
1964    args.state = state.get();
1965    args.name = newname;
1966    args.ino = oldvn->ino_;
1967    status = newdir->ForEachDirent(&args, DirentCallbackAttemptRename);
1968    if (status == ZX_ERR_NOT_FOUND) {
1969        // if 'newname' does not exist, create it
1970        args.offs = append_offs;
1971        if ((status = newdir->AppendDirent(&args)) != ZX_OK) {
1972            return status;
1973        }
1974    } else if (status != ZX_OK) {
1975        return status;
1976    }
1977
1978    // update the oldvn's entry for '..' if (1) it was a directory, and (2) it
1979    // moved to a new directory
1980    if ((args.type == kMinfsTypeDir) && (ino_ != newdir->ino_)) {
1981        fbl::RefPtr<fs::Vnode> vn_fs;
1982        if ((status = newdir->Lookup(&vn_fs, newname)) < 0) {
1983            return status;
1984        }
1985        auto vn = fbl::RefPtr<VnodeMinfs>::Downcast(vn_fs);
1986        args.name = "..";
1987        args.ino = newdir->ino_;
1988        if ((status = vn->ForEachDirent(&args, DirentCallbackUpdateInode)) < 0) {
1989            return status;
1990        }
1991    }
1992
1993    // at this point, the oldvn exists with multiple names (or the same name in
1994    // different directories)
1995    oldvn->inode_.link_count++;
1996
1997    // finally, remove oldname from its original position
1998    args.name = oldname;
1999    if ((status = ForEachDirent(&args, DirentCallbackForceUnlink)) != ZX_OK) {
2000        return status;
2001    }
2002    state->GetWork()->PinVnode(oldvn);
2003    state->GetWork()->PinVnode(newdir);
2004    fs_->CommitTransaction(fbl::move(state));
2005    success = true;
2006    return ZX_OK;
2007}
2008
2009zx_status_t VnodeMinfs::Link(fbl::StringPiece name, fbl::RefPtr<fs::Vnode> _target) {
2010    TRACE_DURATION("minfs", "VnodeMinfs::Link", "name", name);
2011    ZX_DEBUG_ASSERT(fs::vfs_valid_name(name));
2012
2013    if (!IsDirectory()) {
2014        return ZX_ERR_NOT_SUPPORTED;
2015    } else if (IsUnlinked()) {
2016        return ZX_ERR_BAD_STATE;
2017    }
2018
2019    auto target = fbl::RefPtr<VnodeMinfs>::Downcast(_target);
2020    if (target->IsDirectory()) {
2021        // The target must not be a directory
2022        return ZX_ERR_NOT_FILE;
2023    }
2024
2025    // The destination should not exist
2026    DirArgs args = DirArgs();
2027    args.name = name;
2028    zx_status_t status;
2029    if ((status = ForEachDirent(&args, DirentCallbackFind)) != ZX_ERR_NOT_FOUND) {
2030        return (status == ZX_OK) ? ZX_ERR_ALREADY_EXISTS : status;
2031    }
2032
2033    // Ensure that we have enough space to write the new vnode's direntry
2034    // before updating any other metadata.
2035    args.type = kMinfsTypeFile; // We can't hard link directories
2036    args.reclen = static_cast<uint32_t>(DirentSize(static_cast<uint8_t>(name.length())));
2037    status = ForEachDirent(&args, DirentCallbackFindSpace);
2038    if (status == ZX_ERR_NOT_FOUND) {
2039        return ZX_ERR_NO_SPACE;
2040    } else if (status != ZX_OK) {
2041        return status;
2042    }
2043
2044    // Reserve potential blocks to write a new direntry.
2045    blk_t reserved_blocks;
2046    if ((status = GetRequiredBlockCount(GetInode()->size, args.reclen, &reserved_blocks))
2047        != ZX_OK) {
2048        return status;
2049    }
2050
2051    fbl::unique_ptr<Transaction> state;
2052    if ((status = fs_->BeginTransaction(0, reserved_blocks, &state)) != ZX_OK) {
2053        return status;
2054    }
2055
2056    args.ino = target->ino_;
2057    args.state = state.get();
2058    if ((status = AppendDirent(&args)) != ZX_OK) {
2059        return status;
2060    }
2061
2062    // We have successfully added the vn to a new location. Increment the link count.
2063    target->inode_.link_count++;
2064    target->InodeSync(state->GetWork(), kMxFsSyncDefault);
2065    state->GetWork()->PinVnode(fbl::move(fbl::WrapRefPtr(this)));
2066    state->GetWork()->PinVnode(target);
2067    fs_->CommitTransaction(fbl::move(state));
2068    return ZX_OK;
2069}
2070
2071#ifdef __Fuchsia__
2072zx_status_t VnodeMinfs::GetHandles(uint32_t flags, zx_handle_t* hnd, uint32_t* type,
2073                                   zxrio_node_info_t* extra) {
2074    if (IsDirectory()) {
2075        *type = fuchsia_io_NodeInfoTag_directory;
2076    } else {
2077        *type = fuchsia_io_NodeInfoTag_file;
2078    }
2079    return ZX_OK;
2080}
2081
2082void VnodeMinfs::Sync(SyncCallback closure) {
2083    TRACE_DURATION("minfs", "VnodeMinfs::Sync");
2084    fs_->Sync([this, cb = fbl::move(closure)](zx_status_t status) {
2085        if (status != ZX_OK) {
2086            cb(status);
2087            return;
2088        }
2089        status = fs_->bc_->Sync();
2090        cb(status);
2091    });
2092    return;
2093}
2094
2095zx_status_t VnodeMinfs::AttachRemote(fs::MountChannel h) {
2096    if (kMinfsRootIno == ino_) {
2097        return ZX_ERR_ACCESS_DENIED;
2098    } else if (!IsDirectory() || IsUnlinked()) {
2099        return ZX_ERR_NOT_DIR;
2100    } else if (IsRemote()) {
2101        return ZX_ERR_ALREADY_BOUND;
2102    }
2103    SetRemote(fbl::move(h.TakeChannel()));
2104    return ZX_OK;
2105}
2106#endif
2107
2108VnodeMinfs::DirectArgs VnodeMinfs::IndirectArgs::GetDirect(blk_t* barray, unsigned ibindex) const {
2109    // Determine the starting index for direct blocks within this indirect block
2110    blk_t direct_start = ibindex == 0 ? bindex_ : 0;
2111
2112    // Determine how many direct blocks have already been op'd in indirect block context
2113    blk_t found = 0;
2114
2115    if (ibindex) {
2116        found = kMinfsDirectPerIndirect * ibindex - bindex_;
2117    }
2118
2119    DirectArgs params(op_, // op
2120                      &barray[direct_start], // array
2121                      fbl::min(count_ - found, kMinfsDirectPerIndirect - direct_start), // count
2122                      bnos_ == nullptr ? nullptr : &bnos_[found]); // bnos
2123    return params;
2124}
2125
2126VnodeMinfs::IndirectArgs VnodeMinfs::DindirectArgs::GetIndirect(blk_t* iarray,
2127                                                                unsigned dibindex) const {
2128    // Determine relative starting indices for indirect and direct blocks
2129    uint32_t indirect_start = dibindex == 0 ? ibindex_ : 0;
2130    uint32_t direct_start = (dibindex == 0 && indirect_start == ibindex_) ? bindex_ : 0;
2131
2132    // Determine how many direct blocks we have already op'd within doubly indirect
2133    // context
2134    blk_t found = 0;
2135    if (dibindex) {
2136        found = kMinfsDirectPerIndirect * kMinfsDirectPerIndirect * dibindex -
2137                (ibindex_ * kMinfsDirectPerIndirect) + bindex_;
2138    }
2139
2140    IndirectArgs params(op_, // op
2141                        &iarray[indirect_start], // array
2142                        fbl::min(count_ - found, kMinfsDirectPerDindirect - direct_start), // count
2143                        bnos_ == nullptr ? nullptr : &bnos_[found], // bnos
2144                        direct_start, // bindex
2145                        ib_vmo_offset_ + dibindex + ibindex_ // ib_vmo_offset
2146                        );
2147    return params;
2148}
2149
2150} // namespace minfs
2151