1// Copyright 2017 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <stdbool.h>
6#include <string.h>
7#include <threads.h>
8#include <unistd.h>
9
10#include <ddk/protocol/block.h>
11#include <fbl/array.h>
12#include <fbl/atomic.h>
13#include <fbl/auto_call.h>
14#include <fbl/auto_lock.h>
15#include <fbl/limits.h>
16#include <fbl/new.h>
17#include <lib/fzl/mapped-vmo.h>
18#include <lib/sync/completion.h>
19#include <zircon/compiler.h>
20#include <zircon/device/block.h>
21#include <zircon/syscalls.h>
22#include <zircon/thread_annotations.h>
23#include <lib/zx/vmo.h>
24
25#include "fvm-private.h"
26
27namespace fvm {
28namespace {
29
30zx_status_t FvmLoadThread(void* arg) {
31    return reinterpret_cast<fvm::VPartitionManager*>(arg)->Load();
32}
33
34}
35
36fbl::unique_ptr<SliceExtent> SliceExtent::Split(size_t vslice) {
37    ZX_DEBUG_ASSERT(start() <= vslice);
38    ZX_DEBUG_ASSERT(vslice < end());
39    fbl::AllocChecker ac;
40    fbl::unique_ptr<SliceExtent> new_extent(new (&ac) SliceExtent(vslice + 1));
41    if (!ac.check()) {
42        return nullptr;
43    }
44    new_extent->pslices_.reserve(end() - vslice, &ac);
45    if (!ac.check()) {
46        return nullptr;
47    }
48    for (size_t vs = vslice + 1; vs < end(); vs++) {
49        ZX_ASSERT(new_extent->push_back(get(vs)));
50    }
51    while (!is_empty() && vslice + 1 != end()) {
52        pop_back();
53    }
54    return fbl::move(new_extent);
55}
56
57bool SliceExtent::Merge(const SliceExtent& other) {
58    ZX_DEBUG_ASSERT(end() == other.start());
59    fbl::AllocChecker ac;
60    pslices_.reserve(other.size(), &ac);
61    if (!ac.check()) {
62        return false;
63    }
64
65    for (size_t vs = other.start(); vs < other.end(); vs++) {
66        ZX_ASSERT(push_back(other.get(vs)));
67    }
68    return true;
69}
70
71VPartitionManager::VPartitionManager(zx_device_t* parent, const block_info_t& info,
72                                     size_t block_op_size, const block_protocol_t* bp)
73    : ManagerDeviceType(parent), info_(info), metadata_(nullptr), metadata_size_(0),
74      slice_size_(0), pslice_total_count_(0), pslice_allocated_count_(0),
75      block_op_size_(block_op_size) {
76    memcpy(&bp_, bp, sizeof(*bp));
77}
78
79VPartitionManager::~VPartitionManager() = default;
80
81// static
82zx_status_t VPartitionManager::Bind(zx_device_t* dev) {
83    block_info_t block_info;
84    block_protocol_t bp;
85    size_t block_op_size = 0;
86    if (device_get_protocol(dev, ZX_PROTOCOL_BLOCK, &bp) != ZX_OK) {
87        printf("fvm: ERROR: block device '%s': does not support block protocol\n",
88               device_get_name(dev));
89        return ZX_ERR_NOT_SUPPORTED;
90    }
91    bp.ops->query(bp.ctx, &block_info, &block_op_size);
92
93    fbl::AllocChecker ac;
94    auto vpm = fbl::make_unique_checked<VPartitionManager>(&ac, dev, block_info,
95                                                           block_op_size, &bp);
96    if (!ac.check()) {
97        return ZX_ERR_NO_MEMORY;
98    }
99
100    zx_status_t status = vpm->DdkAdd("fvm", DEVICE_ADD_INVISIBLE);
101    if (status != ZX_OK) {
102        return status;
103    }
104
105    // Read vpartition table asynchronously.
106    int rc = thrd_create_with_name(&vpm->initialization_thread_, FvmLoadThread, vpm.get(),
107                                   "fvm-init");
108    if (rc < 0) {
109        vpm->DdkRemove();
110        return ZX_ERR_NO_MEMORY;
111    }
112
113    // The VPartitionManager object is owned by the DDK, now that it has been
114    // added. It will be deleted when the device is released.
115    __UNUSED auto ptr = vpm.release();
116    return ZX_OK;
117}
118
119zx_status_t VPartitionManager::AddPartition(fbl::unique_ptr<VPartition> vp) const {
120    auto ename = reinterpret_cast<const char*>(GetAllocatedVPartEntry(vp->GetEntryIndex())->name);
121    char name[FVM_NAME_LEN + 32];
122    snprintf(name, sizeof(name), "%.*s-p-%zu", FVM_NAME_LEN, ename, vp->GetEntryIndex());
123
124    zx_status_t status;
125    if ((status = vp->DdkAdd(name)) != ZX_OK) {
126        return status;
127    }
128    // TODO(johngro): ask smklein why it is OK to release this managed pointer.
129    __UNUSED auto ptr = vp.release();
130    return ZX_OK;
131}
132
133struct VpmIoCookie {
134    fbl::atomic<size_t> num_txns;
135    fbl::atomic<zx_status_t> status;
136    sync_completion_t signal;
137};
138
139static void IoCallback(block_op_t* op, zx_status_t status) {
140    VpmIoCookie* c = reinterpret_cast<VpmIoCookie*>(op->cookie);
141    if (status != ZX_OK) {
142        c->status.store(status);
143    }
144    if (c->num_txns.fetch_sub(1) - 1 == 0) {
145        sync_completion_signal(&c->signal);
146    }
147}
148
149zx_status_t VPartitionManager::Flush() const {
150    VpmIoCookie cookie;
151    cookie.num_txns.store(1);
152    cookie.status.store(ZX_OK);
153
154    fbl::AllocChecker ac;
155    fbl::unique_ptr<uint8_t[]> op(new (&ac) uint8_t[block_op_size_]());
156    if (!ac.check()) {
157        return ZX_ERR_NO_MEMORY;
158    }
159
160    block_op_t* bop = reinterpret_cast<block_op_t*>(op.get());
161    bop->command = BLOCKIO_FLUSH;
162    bop->completion_cb = IoCallback;
163    bop->cookie = &cookie;
164    Queue(bop);
165
166    sync_completion_wait(&cookie.signal, ZX_TIME_INFINITE);
167    return static_cast<zx_status_t>(cookie.status.load());
168}
169
170zx_status_t VPartitionManager::DoIoLocked(zx_handle_t vmo, size_t off,
171                                          size_t len, uint32_t command) {
172    const size_t block_size = info_.block_size;
173    const size_t max_transfer = info_.max_transfer_size / block_size;
174    size_t len_remaining = len / block_size;
175    size_t vmo_offset = 0;
176    size_t dev_offset = off / block_size;
177    const size_t num_data_txns = fbl::round_up(len_remaining, max_transfer) / max_transfer;
178
179    // Add a "FLUSH" operation to write requests.
180    const bool flushing = command == BLOCK_OP_WRITE;
181    const size_t num_txns = num_data_txns + (flushing ? 1 : 0);
182
183    fbl::AllocChecker ac;
184    fbl::Array<uint8_t> buffer(new (&ac) uint8_t[block_op_size_ * num_txns],
185                               block_op_size_ * num_txns);
186
187    if (!ac.check()) {
188        return ZX_ERR_NO_MEMORY;
189    }
190
191    VpmIoCookie cookie;
192    cookie.num_txns.store(num_txns);
193    cookie.status.store(ZX_OK);
194    sync_completion_reset(&cookie.signal);
195
196    for (size_t i = 0; i < num_data_txns; i++) {
197        size_t length = fbl::min(len_remaining, max_transfer);
198        len_remaining -= length;
199
200        block_op_t* bop = reinterpret_cast<block_op_t*>(buffer.get() + (block_op_size_ * i));
201
202        bop->command = command;
203        bop->rw.vmo = vmo;
204        bop->rw.length = static_cast<uint32_t>(length);
205        bop->rw.offset_dev = dev_offset;
206        bop->rw.offset_vmo = vmo_offset;
207        bop->rw.pages = NULL;
208        bop->completion_cb = IoCallback;
209        bop->cookie = &cookie;
210        memset(buffer.get() + (block_op_size_ * i) + sizeof(block_op_t), 0,
211               block_op_size_ - sizeof(block_op_t));
212        vmo_offset += length;
213        dev_offset += length;
214
215        Queue(bop);
216    }
217
218    if (flushing) {
219        block_op_t* bop = reinterpret_cast<block_op_t*>(buffer.get() +
220                                                        (block_op_size_ * num_data_txns));
221        memset(bop, 0, sizeof(*bop));
222        bop->command = BLOCKIO_FLUSH;
223        bop->completion_cb = IoCallback;
224        bop->cookie = &cookie;
225        Queue(bop);
226    }
227
228    ZX_DEBUG_ASSERT(len_remaining == 0);
229    sync_completion_wait(&cookie.signal, ZX_TIME_INFINITE);
230    return static_cast<zx_status_t>(cookie.status.load());
231}
232
233zx_status_t VPartitionManager::Load() {
234    fbl::AutoLock lock(&lock_);
235
236    auto auto_detach = fbl::MakeAutoCall([&]() TA_NO_THREAD_SAFETY_ANALYSIS {
237        fprintf(stderr, "fvm: Aborting Driver Load\n");
238        DdkRemove();
239        // "Load" is running in a background thread called by bind.
240        // This thread will be joined when the fvm_device is released,
241        // but it must be added to be released.
242        //
243        // If we fail to initialize anything before it is added,
244        // detach the thread and clean up gracefully.
245        thrd_detach(initialization_thread_);
246        // Clang's thread analyzer doesn't think we're holding this lock, but
247        // we clearly are, and need to release it before deleting the
248        // VPartitionManager.
249        lock.release();
250        delete this;
251    });
252
253    zx::vmo vmo;
254    if (zx::vmo::create(FVM_BLOCK_SIZE, 0, &vmo) != ZX_OK) {
255        return ZX_ERR_INTERNAL;
256    }
257
258    // Read the superblock first, to determine the slice sice
259    if (DoIoLocked(vmo.get(), 0, FVM_BLOCK_SIZE, BLOCK_OP_READ)) {
260        fprintf(stderr, "fvm: Failed to read first block from underlying device\n");
261        return ZX_ERR_INTERNAL;
262    }
263
264    fvm_t sb;
265    zx_status_t status = vmo.read(&sb, 0, sizeof(sb));
266    if (status != ZX_OK) {
267        return ZX_ERR_INTERNAL;
268    }
269
270    // Validate the superblock, confirm the slice size
271    slice_size_ = sb.slice_size;
272    if ((slice_size_ * VSliceMax()) / VSliceMax() != slice_size_) {
273        fprintf(stderr, "fvm: Slice Size, VSliceMax overflow block address space\n");
274        return ZX_ERR_BAD_STATE;
275    } else if (info_.block_size == 0 || SliceSize() % info_.block_size) {
276        fprintf(stderr, "fvm: Bad block (%u) or slice size (%zu)\n",
277                info_.block_size, SliceSize());
278        return ZX_ERR_BAD_STATE;
279    } else if (sb.vpartition_table_size != kVPartTableLength) {
280        fprintf(stderr, "fvm: Bad vpartition table size %zu (expected %zu)\n",
281                sb.vpartition_table_size, kVPartTableLength);
282        return ZX_ERR_BAD_STATE;
283    } else if (sb.allocation_table_size != AllocTableLength(DiskSize(), SliceSize())) {
284        fprintf(stderr, "fvm: Bad allocation table size %zu (expected %zu)\n",
285                sb.allocation_table_size, AllocTableLength(DiskSize(), SliceSize()));
286        return ZX_ERR_BAD_STATE;
287    }
288
289    // Cache calculated FVM information.
290    metadata_size_ = fvm::MetadataSize(DiskSize(), SliceSize());
291    pslice_total_count_ = UsableSlicesCount(DiskSize(), SliceSize());
292
293    // Now that the slice size is known, read the rest of the metadata
294    auto make_metadata_vmo = [&](size_t offset, fbl::unique_ptr<fzl::MappedVmo>* out) {
295        fbl::unique_ptr<fzl::MappedVmo> mvmo;
296        zx_status_t status = fzl::MappedVmo::Create(MetadataSize(), "fvm-meta", &mvmo);
297        if (status != ZX_OK) {
298            return status;
299        }
300
301        // Read both copies of metadata, ensure at least one is valid
302        if ((status = DoIoLocked(mvmo->GetVmo(), offset,
303                                 MetadataSize(), BLOCK_OP_READ)) != ZX_OK) {
304            return status;
305        }
306
307        *out = fbl::move(mvmo);
308        return ZX_OK;
309    };
310
311    fbl::unique_ptr<fzl::MappedVmo> mvmo;
312    if ((status = make_metadata_vmo(0, &mvmo)) != ZX_OK) {
313        fprintf(stderr, "fvm: Failed to load metadata vmo: %d\n", status);
314        return status;
315    }
316    fbl::unique_ptr<fzl::MappedVmo> mvmo_backup;
317    if ((status = make_metadata_vmo(MetadataSize(), &mvmo_backup)) != ZX_OK) {
318        fprintf(stderr, "fvm: Failed to load backup metadata vmo: %d\n", status);
319        return status;
320    }
321
322    const void* metadata;
323    if ((status = fvm_validate_header(mvmo->GetData(), mvmo_backup->GetData(),
324                                      MetadataSize(), &metadata)) != ZX_OK) {
325        fprintf(stderr, "fvm: Header validation failure: %d\n", status);
326        return status;
327    }
328
329    if (metadata == mvmo->GetData()) {
330        first_metadata_is_primary_ = true;
331        metadata_ = fbl::move(mvmo);
332    } else {
333        first_metadata_is_primary_ = false;
334        metadata_ = fbl::move(mvmo_backup);
335    }
336
337    // Begin initializing the underlying partitions
338    DdkMakeVisible();
339    auto_detach.cancel();
340
341    // 0th vpartition is invalid
342    fbl::unique_ptr<VPartition> vpartitions[FVM_MAX_ENTRIES] = {};
343
344    // Iterate through FVM Entry table, allocating the VPartitions which
345    // claim to have slices.
346    for (size_t i = 1; i < FVM_MAX_ENTRIES; i++) {
347        if (GetVPartEntryLocked(i)->slices == 0) {
348            continue;
349        } else if ((status = VPartition::Create(this, i, &vpartitions[i])) != ZX_OK) {
350            fprintf(stderr, "FVM: Failed to Create vpartition %zu\n", i);
351            return status;
352        }
353    }
354
355    // Iterate through the Slice Allocation table, filling the slice maps
356    // of VPartitions.
357    for (uint32_t i = 1; i <= GetFvmLocked()->pslice_count; i++) {
358        const slice_entry_t* entry = GetSliceEntryLocked(i);
359        if (entry->Vpart() == FVM_SLICE_ENTRY_FREE) {
360            continue;
361        }
362        if (vpartitions[entry->Vpart()] == nullptr) {
363            continue;
364        }
365
366        // It's fine to load the slices while not holding the vpartition
367        // lock; no VPartition devices exist yet.
368        vpartitions[entry->Vpart()]->SliceSetUnsafe(entry->Vslice(), i);
369        pslice_allocated_count_++;
370    }
371
372    lock.release();
373
374    // Iterate through 'valid' VPartitions, and create their devices.
375    size_t device_count = 0;
376    for (size_t i = 0; i < FVM_MAX_ENTRIES; i++) {
377        if (vpartitions[i] == nullptr) {
378            continue;
379        } else if (GetAllocatedVPartEntry(i)->flags & kVPartFlagInactive) {
380            fprintf(stderr, "FVM: Freeing inactive partition\n");
381            FreeSlices(vpartitions[i].get(), 0, VSliceMax());
382            continue;
383        } else if (AddPartition(fbl::move(vpartitions[i]))) {
384            continue;
385        }
386        device_count++;
387    }
388
389    return ZX_OK;
390}
391
392zx_status_t VPartitionManager::WriteFvmLocked() {
393    zx_status_t status;
394
395    GetFvmLocked()->generation++;
396    fvm_update_hash(GetFvmLocked(), MetadataSize());
397
398    // If we were reading from the primary, write to the backup.
399    status = DoIoLocked(metadata_->GetVmo(), BackupOffsetLocked(),
400                        MetadataSize(), BLOCK_OP_WRITE);
401    if (status != ZX_OK) {
402        fprintf(stderr, "FVM: Failed to write metadata\n");
403        return status;
404    }
405
406    // We only allow the switch of "write to the other copy of metadata"
407    // once a valid version has been written entirely.
408    first_metadata_is_primary_ = !first_metadata_is_primary_;
409    return ZX_OK;
410}
411
412zx_status_t VPartitionManager::FindFreeVPartEntryLocked(size_t* out) const {
413    for (size_t i = 1; i < FVM_MAX_ENTRIES; i++) {
414        const vpart_entry_t* entry = GetVPartEntryLocked(i);
415        if (entry->slices == 0) {
416            *out = i;
417            return ZX_OK;
418        }
419    }
420    return ZX_ERR_NO_SPACE;
421}
422
423zx_status_t VPartitionManager::FindFreeSliceLocked(size_t* out, size_t hint) const {
424    hint = fbl::max(hint, 1lu);
425    for (size_t i = hint; i <= pslice_total_count_; i++) {
426        if (GetSliceEntryLocked(i)->Vpart() == FVM_SLICE_ENTRY_FREE) {
427            *out = i;
428            return ZX_OK;
429        }
430    }
431    for (size_t i = 1; i < hint; i++) {
432        if (GetSliceEntryLocked(i)->Vpart() == FVM_SLICE_ENTRY_FREE) {
433            *out = i;
434            return ZX_OK;
435        }
436    }
437    return ZX_ERR_NO_SPACE;
438}
439
440zx_status_t VPartitionManager::AllocateSlices(VPartition* vp, size_t vslice_start,
441                                              size_t count) {
442    fbl::AutoLock lock(&lock_);
443    return AllocateSlicesLocked(vp, vslice_start, count);
444}
445
446zx_status_t VPartitionManager::AllocateSlicesLocked(VPartition* vp, size_t vslice_start,
447                                                    size_t count) {
448    if (vslice_start + count > VSliceMax()) {
449        return ZX_ERR_INVALID_ARGS;
450    }
451
452    zx_status_t status = ZX_OK;
453    size_t hint = 0;
454
455    {
456        fbl::AutoLock lock(&vp->lock_);
457        if (vp->IsKilledLocked()) {
458            return ZX_ERR_BAD_STATE;
459        }
460        for (size_t i = 0; i < count; i++) {
461            size_t pslice;
462            auto vslice = vslice_start + i;
463            if (vp->SliceGetLocked(vslice) != PSLICE_UNALLOCATED) {
464                status = ZX_ERR_INVALID_ARGS;
465            }
466            if ((status != ZX_OK) ||
467                ((status = FindFreeSliceLocked(&pslice, hint)) != ZX_OK) ||
468                ((status = vp->SliceSetLocked(vslice, static_cast<uint32_t>(pslice)) != ZX_OK))) {
469                for (int j = static_cast<int>(i - 1); j >= 0; j--) {
470                    vslice = vslice_start + j;
471                    FreePhysicalSlice(vp->SliceGetLocked(vslice));
472                    vp->SliceFreeLocked(vslice);
473                }
474
475                return status;
476            }
477            auto vpart = vp->GetEntryIndex();
478            AllocatePhysicalSlice(pslice, vpart, vslice);
479            hint = pslice + 1;
480        }
481    }
482
483    if ((status = WriteFvmLocked()) != ZX_OK) {
484        // Undo allocation in the event of failure; avoid holding VPartition
485        // lock while writing to fvm.
486        fbl::AutoLock lock(&vp->lock_);
487        for (int j = static_cast<int>(count - 1); j >= 0; j--) {
488            auto vslice = vslice_start + j;
489            FreePhysicalSlice(vp->SliceGetLocked(vslice));
490            vp->SliceFreeLocked(vslice);
491        }
492    }
493
494    return status;
495}
496
497zx_status_t VPartitionManager::Upgrade(const uint8_t* old_guid, const uint8_t* new_guid) {
498    fbl::AutoLock lock(&lock_);
499    size_t old_index = 0;
500    size_t new_index = 0;
501
502    if (!memcmp(old_guid, new_guid, GUID_LEN)) {
503        old_guid = nullptr;
504    }
505
506    for (size_t i = 1; i < FVM_MAX_ENTRIES; i++) {
507        auto entry = GetVPartEntryLocked(i);
508        if (entry->slices != 0) {
509            if (old_guid && !(entry->flags & kVPartFlagInactive) &&
510                !memcmp(entry->guid, old_guid, GUID_LEN)) {
511                old_index = i;
512            } else if ((entry->flags & kVPartFlagInactive) &&
513                       !memcmp(entry->guid, new_guid, GUID_LEN)) {
514                new_index = i;
515            }
516        }
517    }
518
519    if (!new_index) {
520        return ZX_ERR_NOT_FOUND;
521    }
522
523    if (old_index) {
524        GetVPartEntryLocked(old_index)->flags |= kVPartFlagInactive;
525    }
526    GetVPartEntryLocked(new_index)->flags &= ~kVPartFlagInactive;
527
528    return WriteFvmLocked();
529}
530
531zx_status_t VPartitionManager::FreeSlices(VPartition* vp, size_t vslice_start,
532                                          size_t count) {
533    fbl::AutoLock lock(&lock_);
534    return FreeSlicesLocked(vp, vslice_start, count);
535}
536
537zx_status_t VPartitionManager::FreeSlicesLocked(VPartition* vp, size_t vslice_start,
538                                                size_t count) {
539    if (vslice_start + count > VSliceMax() || count > VSliceMax()) {
540        return ZX_ERR_INVALID_ARGS;
541    }
542
543    bool freed_something = false;
544    {
545        fbl::AutoLock lock(&vp->lock_);
546        if (vp->IsKilledLocked())
547            return ZX_ERR_BAD_STATE;
548
549        // Sync first, before removing slices, so iotxns in-flight cannot
550        // operate on 'unowned' slices.
551        zx_status_t status = Flush();
552        if (status != ZX_OK) {
553            return status;
554        }
555
556        if (vslice_start == 0) {
557            // Special case: Freeing entire VPartition
558            for (auto extent = vp->ExtentBegin(); extent.IsValid(); extent = vp->ExtentBegin()) {
559                for (size_t i = extent->start(); i < extent->end(); i++) {
560                    FreePhysicalSlice(vp->SliceGetLocked(i));
561                }
562                vp->ExtentDestroyLocked(extent->start());
563            }
564
565            // Remove device, VPartition if this was a request to free all slices.
566            vp->DdkRemove();
567            auto entry = GetVPartEntryLocked(vp->GetEntryIndex());
568            entry->clear();
569            vp->KillLocked();
570            freed_something = true;
571        } else {
572            for (int i = static_cast<int>(count - 1); i >= 0; i--) {
573                auto vslice = vslice_start + i;
574                if (vp->SliceCanFree(vslice)) {
575                    size_t pslice = vp->SliceGetLocked(vslice);
576                    if (!freed_something) {
577                        // The first 'free' is the only one which can fail -- it
578                        // has the potential to split extents, which may require
579                        // memory allocation.
580                        if (!vp->SliceFreeLocked(vslice)) {
581                            return ZX_ERR_NO_MEMORY;
582                        }
583                    } else {
584                        ZX_ASSERT(vp->SliceFreeLocked(vslice));
585                    }
586                    FreePhysicalSlice(pslice);
587                    freed_something = true;
588                }
589            }
590        }
591    }
592
593    if (!freed_something) {
594        return ZX_ERR_INVALID_ARGS;
595    }
596    return WriteFvmLocked();
597}
598
599void VPartitionManager::Query(fvm_info_t* info) {
600    info->slice_size = SliceSize();
601    info->vslice_count = VSliceMax();
602    {
603        fbl::AutoLock lock(&lock_);
604        info->pslice_total_count = pslice_total_count_;
605        info->pslice_allocated_count = pslice_allocated_count_;
606    }
607}
608
609void VPartitionManager::FreePhysicalSlice(size_t pslice) {
610    auto entry = GetSliceEntryLocked(pslice);
611    ZX_DEBUG_ASSERT_MSG(entry->Vpart() != FVM_SLICE_ENTRY_FREE, "Freeing already-free slice");
612    entry->SetVpart(FVM_SLICE_ENTRY_FREE);
613    pslice_allocated_count_--;
614}
615
616void VPartitionManager::AllocatePhysicalSlice(size_t pslice, uint64_t vpart, uint64_t vslice) {
617    ZX_DEBUG_ASSERT(vpart <= VPART_MAX);
618    ZX_DEBUG_ASSERT(vslice <= VSLICE_MAX);
619    auto entry = GetSliceEntryLocked(pslice);
620    ZX_DEBUG_ASSERT_MSG(entry->Vpart() == FVM_SLICE_ENTRY_FREE,
621                        "Allocating previously allocated slice");
622    entry->SetVpart(vpart);
623    entry->SetVslice(vslice);
624    pslice_allocated_count_++;
625}
626
627slice_entry_t* VPartitionManager::GetSliceEntryLocked(size_t index) const {
628    ZX_DEBUG_ASSERT(index >= 1);
629    uintptr_t metadata_start = reinterpret_cast<uintptr_t>(GetFvmLocked());
630    uintptr_t offset = static_cast<uintptr_t>(kAllocTableOffset +
631                                              index * sizeof(slice_entry_t));
632    ZX_DEBUG_ASSERT(kAllocTableOffset <= offset);
633    ZX_DEBUG_ASSERT(offset < kAllocTableOffset + AllocTableLength(DiskSize(), SliceSize()));
634    return reinterpret_cast<slice_entry_t*>(metadata_start + offset);
635}
636
637vpart_entry_t* VPartitionManager::GetVPartEntryLocked(size_t index) const {
638    ZX_DEBUG_ASSERT(index >= 1);
639    uintptr_t metadata_start = reinterpret_cast<uintptr_t>(GetFvmLocked());
640    uintptr_t offset = static_cast<uintptr_t>(kVPartTableOffset +
641                                              index * sizeof(vpart_entry_t));
642    ZX_DEBUG_ASSERT(kVPartTableOffset <= offset);
643    ZX_DEBUG_ASSERT(offset < kVPartTableOffset + kVPartTableLength);
644    return reinterpret_cast<vpart_entry_t*>(metadata_start + offset);
645}
646
647// Device protocol (FVM)
648
649zx_status_t VPartitionManager::DdkIoctl(uint32_t op, const void* cmd,
650                                        size_t cmdlen, void* reply, size_t max,
651                                        size_t* out_actual) {
652    switch (op) {
653    case IOCTL_BLOCK_FVM_ALLOC_PARTITION: {
654        if (cmdlen < sizeof(alloc_req_t))
655            return ZX_ERR_BUFFER_TOO_SMALL;
656        const alloc_req_t* request = static_cast<const alloc_req_t*>(cmd);
657
658        if (request->slice_count >= fbl::numeric_limits<uint32_t>::max()) {
659            return ZX_ERR_OUT_OF_RANGE;
660        } else if (request->slice_count == 0) {
661            return ZX_ERR_OUT_OF_RANGE;
662        }
663
664        zx_status_t status;
665        fbl::unique_ptr<VPartition> vpart;
666        {
667            fbl::AutoLock lock(&lock_);
668            size_t vpart_entry;
669            if ((status = FindFreeVPartEntryLocked(&vpart_entry)) != ZX_OK) {
670                return status;
671            }
672
673            if ((status = VPartition::Create(this, vpart_entry, &vpart)) != ZX_OK) {
674                return status;
675            }
676
677            auto entry = GetVPartEntryLocked(vpart_entry);
678            entry->init(request->type, request->guid,
679                        static_cast<uint32_t>(request->slice_count),
680                        request->name, request->flags & kVPartAllocateMask);
681
682            if ((status = AllocateSlicesLocked(vpart.get(), 0,
683                                               request->slice_count)) != ZX_OK) {
684                entry->slices = 0; // Undo VPartition allocation
685                return status;
686            }
687        }
688        if ((status = AddPartition(fbl::move(vpart))) != ZX_OK) {
689            return status;
690        }
691        return ZX_OK;
692    }
693    case IOCTL_BLOCK_FVM_QUERY: {
694        if (max < sizeof(fvm_info_t)) {
695            return ZX_ERR_BUFFER_TOO_SMALL;
696        }
697        fvm_info_t* info = static_cast<fvm_info_t*>(reply);
698        Query(info);
699        *out_actual = sizeof(fvm_info_t);
700        return ZX_OK;
701    }
702    case IOCTL_BLOCK_FVM_UPGRADE: {
703        if (cmdlen < sizeof(upgrade_req_t)) {
704            return ZX_ERR_BUFFER_TOO_SMALL;
705        }
706        const upgrade_req_t* req = static_cast<const upgrade_req_t*>(cmd);
707        return Upgrade(req->old_guid, req->new_guid);
708    }
709    default:
710        return ZX_ERR_NOT_SUPPORTED;
711    }
712
713    return ZX_ERR_NOT_SUPPORTED;
714}
715
716void VPartitionManager::DdkUnbind() {
717    DdkRemove();
718}
719
720void VPartitionManager::DdkRelease() {
721    thrd_join(initialization_thread_, nullptr);
722    delete this;
723}
724
725VPartition::VPartition(VPartitionManager* vpm, size_t entry_index, size_t block_op_size)
726    : PartitionDeviceType(vpm->zxdev()), mgr_(vpm), entry_index_(entry_index) {
727
728    memcpy(&info_, &mgr_->Info(), sizeof(block_info_t));
729    info_.block_count = 0;
730}
731
732VPartition::~VPartition() = default;
733
734zx_status_t VPartition::Create(VPartitionManager* vpm, size_t entry_index,
735                               fbl::unique_ptr<VPartition>* out) {
736    ZX_DEBUG_ASSERT(entry_index != 0);
737
738    fbl::AllocChecker ac;
739    auto vp = fbl::make_unique_checked<VPartition>(&ac, vpm, entry_index, vpm->BlockOpSize());
740    if (!ac.check()) {
741        return ZX_ERR_NO_MEMORY;
742    }
743
744    *out = fbl::move(vp);
745    return ZX_OK;
746}
747
748uint32_t VPartition::SliceGetLocked(size_t vslice) const {
749    ZX_DEBUG_ASSERT(vslice < mgr_->VSliceMax());
750    auto extent = --slice_map_.upper_bound(vslice);
751    if (!extent.IsValid()) {
752        return PSLICE_UNALLOCATED;
753    }
754    ZX_DEBUG_ASSERT(extent->start() <= vslice);
755    return extent->get(vslice);
756}
757
758zx_status_t VPartition::CheckSlices(size_t vslice_start, size_t* count, bool* allocated) {
759    fbl::AutoLock lock(&lock_);
760
761    if (vslice_start >= mgr_->VSliceMax()) {
762        return ZX_ERR_OUT_OF_RANGE;
763    }
764
765    if (IsKilledLocked()) {
766        return ZX_ERR_BAD_STATE;
767    }
768
769    *count = 0;
770    *allocated = false;
771
772    auto extent = --slice_map_.upper_bound(vslice_start);
773    if (extent.IsValid()) {
774        ZX_DEBUG_ASSERT(extent->start() <= vslice_start);
775        if (extent->start() + extent->size() > vslice_start) {
776            *count = extent->size() - (vslice_start - extent->start());
777            *allocated = true;
778        }
779    }
780
781    if (!(*allocated)) {
782        auto extent = slice_map_.upper_bound(vslice_start);
783        if (extent.IsValid()) {
784            ZX_DEBUG_ASSERT(extent->start() > vslice_start);
785            *count = extent->start() - vslice_start;
786        } else {
787            *count = mgr_->VSliceMax() - vslice_start;
788        }
789    }
790
791    return ZX_OK;
792}
793
794zx_status_t VPartition::SliceSetLocked(size_t vslice, uint32_t pslice) {
795    ZX_DEBUG_ASSERT(vslice < mgr_->VSliceMax());
796    auto extent = --slice_map_.upper_bound(vslice);
797    ZX_DEBUG_ASSERT(!extent.IsValid() || extent->get(vslice) == PSLICE_UNALLOCATED);
798    if (extent.IsValid() && (vslice == extent->end())) {
799        // Easy case: append to existing slice
800        if (!extent->push_back(pslice)) {
801            return ZX_ERR_NO_MEMORY;
802        }
803    } else {
804        // Longer case: there is no extent for this vslice, so we should make
805        // one.
806        fbl::AllocChecker ac;
807        fbl::unique_ptr<SliceExtent> new_extent(new (&ac) SliceExtent(vslice));
808        if (!ac.check()) {
809            return ZX_ERR_NO_MEMORY;
810        } else if (!new_extent->push_back(pslice)) {
811            return ZX_ERR_NO_MEMORY;
812        }
813        ZX_DEBUG_ASSERT(new_extent->GetKey() == vslice);
814        ZX_DEBUG_ASSERT(new_extent->get(vslice) == pslice);
815        slice_map_.insert(fbl::move(new_extent));
816        extent = --slice_map_.upper_bound(vslice);
817    }
818
819    ZX_DEBUG_ASSERT(SliceGetLocked(vslice) == pslice);
820    AddBlocksLocked((mgr_->SliceSize() / info_.block_size));
821
822    // Merge with the next contiguous extent (if any)
823    auto nextExtent = slice_map_.upper_bound(vslice);
824    if (nextExtent.IsValid() && (vslice + 1 == nextExtent->start())) {
825        if (extent->Merge(*nextExtent)) {
826            slice_map_.erase(*nextExtent);
827        }
828    }
829
830    return ZX_OK;
831}
832
833bool VPartition::SliceFreeLocked(size_t vslice) {
834    ZX_DEBUG_ASSERT(vslice < mgr_->VSliceMax());
835    ZX_DEBUG_ASSERT(SliceCanFree(vslice));
836    auto extent = --slice_map_.upper_bound(vslice);
837    if (vslice != extent->end() - 1) {
838        // Removing from the middle of an extent; this splits the extent in
839        // two.
840        auto new_extent = extent->Split(vslice);
841        if (new_extent == nullptr) {
842            return false;
843        }
844        slice_map_.insert(fbl::move(new_extent));
845    }
846    // Removing from end of extent
847    extent->pop_back();
848    if (extent->is_empty()) {
849        slice_map_.erase(*extent);
850    }
851
852    AddBlocksLocked(-(mgr_->SliceSize() / info_.block_size));
853    return true;
854}
855
856void VPartition::ExtentDestroyLocked(size_t vslice) TA_REQ(lock_) {
857    ZX_DEBUG_ASSERT(vslice < mgr_->VSliceMax());
858    ZX_DEBUG_ASSERT(SliceCanFree(vslice));
859    auto extent = --slice_map_.upper_bound(vslice);
860    size_t length = extent->size();
861    slice_map_.erase(*extent);
862    AddBlocksLocked(-((length * mgr_->SliceSize()) / info_.block_size));
863}
864
865static zx_status_t RequestBoundCheck(const extend_request_t* request,
866                                     size_t vslice_max) {
867    if (request->offset == 0 || request->offset > vslice_max) {
868        return ZX_ERR_OUT_OF_RANGE;
869    } else if (request->length > vslice_max) {
870        return ZX_ERR_OUT_OF_RANGE;
871    } else if (request->offset + request->length < request->offset ||
872               request->offset + request->length > vslice_max) {
873        return ZX_ERR_OUT_OF_RANGE;
874    }
875    return ZX_OK;
876}
877
878// Device protocol (VPartition)
879
880zx_status_t VPartition::DdkIoctl(uint32_t op, const void* cmd, size_t cmdlen,
881                                 void* reply, size_t max, size_t* out_actual) {
882    switch (op) {
883    case IOCTL_BLOCK_GET_INFO: {
884        block_info_t* info = static_cast<block_info_t*>(reply);
885        if (max < sizeof(*info))
886            return ZX_ERR_BUFFER_TOO_SMALL;
887        fbl::AutoLock lock(&lock_);
888        if (IsKilledLocked())
889            return ZX_ERR_BAD_STATE;
890        memcpy(info, &info_, sizeof(*info));
891        *out_actual = sizeof(*info);
892        return ZX_OK;
893    }
894    case IOCTL_BLOCK_FVM_VSLICE_QUERY: {
895        if (cmdlen < sizeof(query_request_t)) {
896            return ZX_ERR_BUFFER_TOO_SMALL;
897        }
898
899        if (max < sizeof(query_response_t)) {
900            return ZX_ERR_BUFFER_TOO_SMALL;
901        }
902
903        const query_request_t* request = static_cast<const query_request_t*>(cmd);
904
905        if (request->count > MAX_FVM_VSLICE_REQUESTS) {
906           return ZX_ERR_BUFFER_TOO_SMALL;
907        }
908
909        query_response_t* response = static_cast<query_response_t*>(reply);
910        response->count = 0;
911        for (size_t i = 0; i < request->count; i++) {
912            zx_status_t status;
913            if ((status = CheckSlices(request->vslice_start[i], &response->vslice_range[i].count,
914                                      &response->vslice_range[i].allocated)) != ZX_OK) {
915                return status;
916            }
917            response->count++;
918        }
919
920        *out_actual = sizeof(query_response_t);
921        return ZX_OK;
922    }
923    case IOCTL_BLOCK_FVM_QUERY: {
924        if (max < sizeof(fvm_info_t)) {
925            return ZX_ERR_BUFFER_TOO_SMALL;
926        }
927        fvm_info_t* info = static_cast<fvm_info_t*>(reply);
928        mgr_->Query(info);
929        *out_actual = sizeof(fvm_info_t);
930        return ZX_OK;
931    }
932    case IOCTL_BLOCK_GET_TYPE_GUID: {
933        char* guid = static_cast<char*>(reply);
934        if (max < FVM_GUID_LEN)
935            return ZX_ERR_BUFFER_TOO_SMALL;
936        fbl::AutoLock lock(&lock_);
937        if (IsKilledLocked())
938            return ZX_ERR_BAD_STATE;
939        memcpy(guid, mgr_->GetAllocatedVPartEntry(entry_index_)->type, FVM_GUID_LEN);
940        *out_actual = FVM_GUID_LEN;
941        return ZX_OK;
942    }
943    case IOCTL_BLOCK_GET_PARTITION_GUID: {
944        char* guid = static_cast<char*>(reply);
945        if (max < FVM_GUID_LEN)
946            return ZX_ERR_BUFFER_TOO_SMALL;
947        fbl::AutoLock lock(&lock_);
948        if (IsKilledLocked())
949            return ZX_ERR_BAD_STATE;
950        memcpy(guid, mgr_->GetAllocatedVPartEntry(entry_index_)->guid, FVM_GUID_LEN);
951        *out_actual = FVM_GUID_LEN;
952        return ZX_OK;
953    }
954    case IOCTL_BLOCK_GET_NAME: {
955        char* name = static_cast<char*>(reply);
956        if (max < FVM_NAME_LEN + 1)
957            return ZX_ERR_BUFFER_TOO_SMALL;
958        fbl::AutoLock lock(&lock_);
959        if (IsKilledLocked())
960            return ZX_ERR_BAD_STATE;
961        memcpy(name, mgr_->GetAllocatedVPartEntry(entry_index_)->name, FVM_NAME_LEN);
962        name[FVM_NAME_LEN] = 0;
963        *out_actual = strlen(name);
964        return ZX_OK;
965    }
966    case IOCTL_DEVICE_SYNC: {
967        // Propagate sync to parent device
968        return mgr_->Flush();
969    }
970    case IOCTL_BLOCK_FVM_EXTEND: {
971        if (cmdlen < sizeof(extend_request_t))
972            return ZX_ERR_BUFFER_TOO_SMALL;
973        const extend_request_t* request = static_cast<const extend_request_t*>(cmd);
974        zx_status_t status;
975        if ((status = RequestBoundCheck(request, mgr_->VSliceMax())) != ZX_OK) {
976            return status;
977        } else if (request->length == 0) {
978            return ZX_OK;
979        }
980        return mgr_->AllocateSlices(this, request->offset, request->length);
981    }
982    case IOCTL_BLOCK_FVM_SHRINK: {
983        if (cmdlen < sizeof(extend_request_t))
984            return ZX_ERR_BUFFER_TOO_SMALL;
985        const extend_request_t* request = static_cast<const extend_request_t*>(cmd);
986        zx_status_t status;
987        if ((status = RequestBoundCheck(request, mgr_->VSliceMax())) != ZX_OK) {
988            return status;
989        } else if (request->length == 0) {
990            return ZX_OK;
991        }
992        return mgr_->FreeSlices(this, request->offset, request->length);
993    }
994    case IOCTL_BLOCK_FVM_DESTROY_PARTITION: {
995        return mgr_->FreeSlices(this, 0, mgr_->VSliceMax());
996    }
997    default:
998        return ZX_ERR_NOT_SUPPORTED;
999    }
1000}
1001
1002typedef struct multi_txn_state {
1003    multi_txn_state(size_t total, block_op_t* txn)
1004        : txns_completed(0), txns_total(total), status(ZX_OK), original(txn) {}
1005
1006    fbl::Mutex lock;
1007    size_t txns_completed TA_GUARDED(lock);
1008    size_t txns_total TA_GUARDED(lock);
1009    zx_status_t status TA_GUARDED(lock);
1010    block_op_t* original TA_GUARDED(lock);
1011} multi_txn_state_t;
1012
1013static void multi_txn_completion(block_op_t* txn, zx_status_t status) {
1014    multi_txn_state_t* state = static_cast<multi_txn_state_t*>(txn->cookie);
1015    bool last_txn = false;
1016    {
1017        fbl::AutoLock lock(&state->lock);
1018        state->txns_completed++;
1019        if (state->status == ZX_OK && status != ZX_OK) {
1020            state->status = status;
1021        }
1022        if (state->txns_completed == state->txns_total) {
1023            last_txn = true;
1024            state->original->completion_cb(state->original, state->status);
1025        }
1026    }
1027
1028    if (last_txn) {
1029        delete state;
1030    }
1031    delete[] txn;
1032}
1033
1034void VPartition::BlockQueue(block_op_t* txn) {
1035    ZX_DEBUG_ASSERT(mgr_->BlockOpSize() > 0);
1036    switch (txn->command & BLOCK_OP_MASK) {
1037    case BLOCK_OP_READ:
1038    case BLOCK_OP_WRITE:
1039        break;
1040    // Pass-through operations
1041    case BLOCK_OP_FLUSH:
1042        mgr_->Queue(txn);
1043        return;
1044    default:
1045        fprintf(stderr, "[FVM BlockQueue] Unsupported Command: %x\n", txn->command);
1046        txn->completion_cb(txn, ZX_ERR_NOT_SUPPORTED);
1047        return;
1048    }
1049
1050    const uint64_t device_capacity = DdkGetSize() / BlockSize();
1051    if (txn->rw.length == 0) {
1052        txn->completion_cb(txn, ZX_ERR_INVALID_ARGS);
1053        return;
1054    } else if ((txn->rw.offset_dev >= device_capacity) ||
1055               (device_capacity - txn->rw.offset_dev < txn->rw.length)) {
1056        txn->completion_cb(txn, ZX_ERR_OUT_OF_RANGE);
1057        return;
1058    }
1059
1060    const size_t disk_size = mgr_->DiskSize();
1061    const size_t slice_size = mgr_->SliceSize();
1062    const uint64_t blocks_per_slice = slice_size / BlockSize();
1063    // Start, end both inclusive
1064    size_t vslice_start = txn->rw.offset_dev / blocks_per_slice;
1065    size_t vslice_end = (txn->rw.offset_dev + txn->rw.length - 1) / blocks_per_slice;
1066
1067    fbl::AutoLock lock(&lock_);
1068    if (vslice_start == vslice_end) {
1069        // Common case: txn occurs within one slice
1070        uint32_t pslice = SliceGetLocked(vslice_start);
1071        if (pslice == PSLICE_UNALLOCATED) {
1072            txn->completion_cb(txn, ZX_ERR_OUT_OF_RANGE);
1073            return;
1074        }
1075        txn->rw.offset_dev = SliceStart(disk_size, slice_size, pslice) /
1076                BlockSize() + (txn->rw.offset_dev % blocks_per_slice);
1077        mgr_->Queue(txn);
1078        return;
1079    }
1080
1081    // Less common case: txn spans multiple slices
1082
1083    // First, check that all slices are allocated.
1084    // If any are missing, then this txn will fail.
1085    bool contiguous = true;
1086    for (size_t vslice = vslice_start; vslice <= vslice_end; vslice++) {
1087        if (SliceGetLocked(vslice) == PSLICE_UNALLOCATED) {
1088            txn->completion_cb(txn, ZX_ERR_OUT_OF_RANGE);
1089            return;
1090        }
1091        if (vslice != vslice_start && SliceGetLocked(vslice - 1) + 1 != SliceGetLocked(vslice)) {
1092            contiguous = false;
1093        }
1094    }
1095
1096    // Ideal case: slices are contiguous
1097    if (contiguous) {
1098        uint32_t pslice = SliceGetLocked(vslice_start);
1099        txn->rw.offset_dev = SliceStart(disk_size, slice_size, pslice) /
1100                BlockSize() + (txn->rw.offset_dev % blocks_per_slice);
1101        mgr_->Queue(txn);
1102        return;
1103    }
1104
1105    // Harder case: Noncontiguous slices
1106    const size_t txn_count = vslice_end - vslice_start + 1;
1107    fbl::Vector<block_op_t*> txns;
1108    txns.reserve(txn_count);
1109
1110    fbl::AllocChecker ac;
1111    fbl::unique_ptr<multi_txn_state_t> state(new (&ac) multi_txn_state_t(txn_count, txn));
1112    if (!ac.check()) {
1113        txn->completion_cb(txn, ZX_ERR_NO_MEMORY);
1114        return;
1115    }
1116
1117    uint32_t length_remaining = txn->rw.length;
1118    for (size_t i = 0; i < txn_count; i++) {
1119        size_t vslice = vslice_start + i;
1120        uint32_t pslice = SliceGetLocked(vslice);
1121
1122        uint64_t offset_vmo = txn->rw.offset_vmo;
1123        uint64_t length;
1124        if (vslice == vslice_start) {
1125            length = fbl::round_up(txn->rw.offset_dev + 1, blocks_per_slice) - txn->rw.offset_dev;
1126        } else if (vslice == vslice_end) {
1127            length = length_remaining;
1128            offset_vmo += txn->rw.length - length_remaining;
1129        } else {
1130            length = blocks_per_slice;
1131            offset_vmo += txns[0]->rw.length + blocks_per_slice * (i - 1);
1132        }
1133        ZX_DEBUG_ASSERT(length <= blocks_per_slice);
1134        ZX_DEBUG_ASSERT(length <= length_remaining);
1135
1136        txns.push_back(reinterpret_cast<block_op_t*>(new uint8_t[mgr_->BlockOpSize()]));
1137        if (txns[i] == nullptr) {
1138            while (i-- > 0) {
1139                delete[] txns[i];
1140            }
1141            txn->completion_cb(txn, ZX_ERR_NO_MEMORY);
1142            return;
1143        }
1144        memcpy(txns[i], txn, sizeof(*txn));
1145        txns[i]->rw.offset_vmo = offset_vmo;
1146        txns[i]->rw.length = static_cast<uint32_t>(length);
1147        txns[i]->rw.offset_dev = SliceStart(disk_size, slice_size, pslice) / BlockSize();
1148        if (vslice == vslice_start) {
1149            txns[i]->rw.offset_dev += (txn->rw.offset_dev % blocks_per_slice);
1150        }
1151        length_remaining -= txns[i]->rw.length;
1152        txns[i]->completion_cb = multi_txn_completion;
1153        txns[i]->cookie = state.get();
1154    }
1155    ZX_DEBUG_ASSERT(length_remaining == 0);
1156
1157    for (size_t i = 0; i < txn_count; i++) {
1158        mgr_->Queue(txns[i]);
1159    }
1160    // TODO(johngro): ask smklein why it is OK to release this managed pointer.
1161    __UNUSED auto ptr = state.release();
1162}
1163
1164zx_off_t VPartition::DdkGetSize() {
1165    const zx_off_t sz = mgr_->VSliceMax() * mgr_->SliceSize();
1166    // Check for overflow; enforced when loading driver
1167    ZX_DEBUG_ASSERT(sz / mgr_->VSliceMax() == mgr_->SliceSize());
1168    return sz;
1169}
1170
1171void VPartition::DdkUnbind() {
1172    DdkRemove();
1173}
1174
1175void VPartition::DdkRelease() {
1176    delete this;
1177}
1178
1179void VPartition::BlockQuery(block_info_t* info_out, size_t* block_op_size_out) {
1180    static_assert(fbl::is_same<decltype(info_out), decltype(&info_)>::value, "Info type mismatch");
1181    memcpy(info_out, &info_, sizeof(info_));
1182    *block_op_size_out = mgr_->BlockOpSize();
1183}
1184
1185} // namespace fvm
1186
1187// C-compatibility definitions
1188
1189zx_status_t fvm_bind(zx_device_t* parent) {
1190    return fvm::VPartitionManager::Bind(parent);
1191}
1192