1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright 2023 Red Hat
4 */
5
6#ifndef VDO_TYPES_H
7#define VDO_TYPES_H
8
9#include <linux/bio.h>
10#include <linux/blkdev.h>
11#include <linux/device-mapper.h>
12#include <linux/list.h>
13#include <linux/compiler_attributes.h>
14#include <linux/types.h>
15
16#include "funnel-queue.h"
17
18/* A size type in blocks. */
19typedef u64 block_count_t;
20
21/* The size of a block. */
22typedef u16 block_size_t;
23
24/* A counter for data_vios */
25typedef u16 data_vio_count_t;
26
27/* A height within a tree. */
28typedef u8 height_t;
29
30/* The logical block number as used by the consumer. */
31typedef u64 logical_block_number_t;
32
33/* The type of the nonce used to identify instances of VDO. */
34typedef u64 nonce_t;
35
36/* A size in pages. */
37typedef u32 page_count_t;
38
39/* A page number. */
40typedef u32 page_number_t;
41
42/*
43 * The physical (well, less logical) block number at which the block is found on the underlying
44 * device.
45 */
46typedef u64 physical_block_number_t;
47
48/* A count of tree roots. */
49typedef u8 root_count_t;
50
51/* A number of sectors. */
52typedef u8 sector_count_t;
53
54/* A sequence number. */
55typedef u64 sequence_number_t;
56
57/* The offset of a block within a slab. */
58typedef u32 slab_block_number;
59
60/* A size type in slabs. */
61typedef u16 slab_count_t;
62
63/* A slot in a bin or block map page. */
64typedef u16 slot_number_t;
65
66/* typedef thread_count_t - A thread counter. */
67typedef u8 thread_count_t;
68
69/* typedef thread_id_t - A thread ID, vdo threads are numbered sequentially from 0. */
70typedef u8 thread_id_t;
71
72/* A zone counter */
73typedef u8 zone_count_t;
74
75/* The following enums are persisted on storage, so the values must be preserved. */
76
77/* The current operating mode of the VDO. */
78enum vdo_state {
79	VDO_DIRTY = 0,
80	VDO_NEW = 1,
81	VDO_CLEAN = 2,
82	VDO_READ_ONLY_MODE = 3,
83	VDO_FORCE_REBUILD = 4,
84	VDO_RECOVERING = 5,
85	VDO_REPLAYING = 6, /* VDO_REPLAYING is never set anymore, but retained for upgrade */
86	VDO_REBUILD_FOR_UPGRADE = 7,
87
88	/* Keep VDO_STATE_COUNT at the bottom. */
89	VDO_STATE_COUNT
90};
91
92/**
93 * vdo_state_requires_read_only_rebuild() - Check whether a vdo_state indicates
94 * that a read-only rebuild is required.
95 * @state: The vdo_state to check.
96 *
97 * Return: true if the state indicates a rebuild is required
98 */
99static inline bool __must_check vdo_state_requires_read_only_rebuild(enum vdo_state state)
100{
101	return ((state == VDO_FORCE_REBUILD) || (state == VDO_REBUILD_FOR_UPGRADE));
102}
103
104/**
105 * vdo_state_requires_recovery() - Check whether a vdo state indicates that recovery is needed.
106 * @state: The state to check.
107 *
108 * Return: true if the state indicates a recovery is required
109 */
110static inline bool __must_check vdo_state_requires_recovery(enum vdo_state state)
111{
112	return ((state == VDO_DIRTY) || (state == VDO_REPLAYING) || (state == VDO_RECOVERING));
113}
114
115/*
116 * The current operation on a physical block (from the point of view of the recovery journal, slab
117 * journals, and reference counts.
118 */
119enum journal_operation {
120	VDO_JOURNAL_DATA_REMAPPING = 0,
121	VDO_JOURNAL_BLOCK_MAP_REMAPPING = 1,
122} __packed;
123
124/* Partition IDs encoded in the volume layout in the super block. */
125enum partition_id {
126	VDO_BLOCK_MAP_PARTITION = 0,
127	VDO_SLAB_DEPOT_PARTITION = 1,
128	VDO_RECOVERY_JOURNAL_PARTITION = 2,
129	VDO_SLAB_SUMMARY_PARTITION = 3,
130} __packed;
131
132/* Metadata types for the vdo. */
133enum vdo_metadata_type {
134	VDO_METADATA_RECOVERY_JOURNAL = 1,
135	VDO_METADATA_SLAB_JOURNAL = 2,
136	VDO_METADATA_RECOVERY_JOURNAL_2 = 3,
137} __packed;
138
139/* A position in the block map where a block map entry is stored. */
140struct block_map_slot {
141	physical_block_number_t pbn;
142	slot_number_t slot;
143};
144
145/*
146 * Four bits of each five-byte block map entry contain a mapping state value used to distinguish
147 * unmapped or discarded logical blocks (which are treated as mapped to the zero block) from entries
148 * that have been mapped to a physical block, including the zero block.
149 *
150 * FIXME: these should maybe be defines.
151 */
152enum block_mapping_state {
153	VDO_MAPPING_STATE_UNMAPPED = 0, /* Must be zero to be the default value */
154	VDO_MAPPING_STATE_UNCOMPRESSED = 1, /* A normal (uncompressed) block */
155	VDO_MAPPING_STATE_COMPRESSED_BASE = 2, /* Compressed in slot 0 */
156	VDO_MAPPING_STATE_COMPRESSED_MAX = 15, /* Compressed in slot 13 */
157};
158
159enum {
160	VDO_MAX_COMPRESSION_SLOTS =
161		(VDO_MAPPING_STATE_COMPRESSED_MAX - VDO_MAPPING_STATE_COMPRESSED_BASE + 1),
162};
163
164
165struct data_location {
166	physical_block_number_t pbn;
167	enum block_mapping_state state;
168};
169
170/* The configuration of a single slab derived from the configured block size and slab size. */
171struct slab_config {
172	/* total number of blocks in the slab */
173	block_count_t slab_blocks;
174	/* number of blocks available for data */
175	block_count_t data_blocks;
176	/* number of blocks for reference counts */
177	block_count_t reference_count_blocks;
178	/* number of blocks for the slab journal */
179	block_count_t slab_journal_blocks;
180	/*
181	 * Number of blocks after which the slab journal starts pushing out a reference_block for
182	 * each new entry it receives.
183	 */
184	block_count_t slab_journal_flushing_threshold;
185	/*
186	 * Number of blocks after which the slab journal pushes out all reference_blocks and makes
187	 * all vios wait.
188	 */
189	block_count_t slab_journal_blocking_threshold;
190	/* Number of blocks after which the slab must be scrubbed before coming online. */
191	block_count_t slab_journal_scrubbing_threshold;
192} __packed;
193
194/*
195 * This structure is memcmp'd for equality. Keep it packed and don't add any fields that are not
196 * properly set in both extant and parsed configs.
197 */
198struct thread_count_config {
199	unsigned int bio_ack_threads;
200	unsigned int bio_threads;
201	unsigned int bio_rotation_interval;
202	unsigned int cpu_threads;
203	unsigned int logical_zones;
204	unsigned int physical_zones;
205	unsigned int hash_zones;
206} __packed;
207
208struct device_config {
209	struct dm_target *owning_target;
210	struct dm_dev *owned_device;
211	struct vdo *vdo;
212	/* All configs referencing a layer are kept on a list in the layer */
213	struct list_head config_list;
214	char *original_string;
215	unsigned int version;
216	char *parent_device_name;
217	block_count_t physical_blocks;
218	/*
219	 * This is the number of logical blocks from VDO's internal point of view. It is the number
220	 * of 4K blocks regardless of the value of the logical_block_size parameter below.
221	 */
222	block_count_t logical_blocks;
223	unsigned int logical_block_size;
224	unsigned int cache_size;
225	unsigned int block_map_maximum_age;
226	bool deduplication;
227	bool compression;
228	struct thread_count_config thread_counts;
229	block_count_t max_discard_blocks;
230};
231
232enum vdo_completion_type {
233	/* Keep VDO_UNSET_COMPLETION_TYPE at the top. */
234	VDO_UNSET_COMPLETION_TYPE,
235	VDO_ACTION_COMPLETION,
236	VDO_ADMIN_COMPLETION,
237	VDO_BLOCK_ALLOCATOR_COMPLETION,
238	VDO_DATA_VIO_POOL_COMPLETION,
239	VDO_DECREMENT_COMPLETION,
240	VDO_FLUSH_COMPLETION,
241	VDO_FLUSH_NOTIFICATION_COMPLETION,
242	VDO_GENERATION_FLUSHED_COMPLETION,
243	VDO_HASH_ZONE_COMPLETION,
244	VDO_HASH_ZONES_COMPLETION,
245	VDO_LOCK_COUNTER_COMPLETION,
246	VDO_PAGE_COMPLETION,
247	VDO_READ_ONLY_MODE_COMPLETION,
248	VDO_REPAIR_COMPLETION,
249	VDO_SYNC_COMPLETION,
250	VIO_COMPLETION,
251} __packed;
252
253struct vdo_completion;
254
255/**
256 * typedef vdo_action_fn - An asynchronous VDO operation.
257 * @completion: The completion of the operation.
258 */
259typedef void (*vdo_action_fn)(struct vdo_completion *completion);
260
261enum vdo_completion_priority {
262	BIO_ACK_Q_ACK_PRIORITY = 0,
263	BIO_ACK_Q_MAX_PRIORITY = 0,
264	BIO_Q_COMPRESSED_DATA_PRIORITY = 0,
265	BIO_Q_DATA_PRIORITY = 0,
266	BIO_Q_FLUSH_PRIORITY = 2,
267	BIO_Q_HIGH_PRIORITY = 2,
268	BIO_Q_METADATA_PRIORITY = 1,
269	BIO_Q_VERIFY_PRIORITY = 1,
270	BIO_Q_MAX_PRIORITY = 2,
271	CPU_Q_COMPLETE_VIO_PRIORITY = 0,
272	CPU_Q_COMPLETE_READ_PRIORITY = 0,
273	CPU_Q_COMPRESS_BLOCK_PRIORITY = 0,
274	CPU_Q_EVENT_REPORTER_PRIORITY = 0,
275	CPU_Q_HASH_BLOCK_PRIORITY = 0,
276	CPU_Q_MAX_PRIORITY = 0,
277	UDS_Q_PRIORITY = 0,
278	UDS_Q_MAX_PRIORITY = 0,
279	VDO_DEFAULT_Q_COMPLETION_PRIORITY = 1,
280	VDO_DEFAULT_Q_FLUSH_PRIORITY = 2,
281	VDO_DEFAULT_Q_MAP_BIO_PRIORITY = 0,
282	VDO_DEFAULT_Q_SYNC_PRIORITY = 2,
283	VDO_DEFAULT_Q_VIO_CALLBACK_PRIORITY = 1,
284	VDO_DEFAULT_Q_MAX_PRIORITY = 2,
285	/* The maximum allowable priority */
286	VDO_WORK_Q_MAX_PRIORITY = 2,
287	/* A value which must be out of range for a valid priority */
288	VDO_WORK_Q_DEFAULT_PRIORITY = VDO_WORK_Q_MAX_PRIORITY + 1,
289};
290
291struct vdo_completion {
292	/* The type of completion this is */
293	enum vdo_completion_type type;
294
295	/*
296	 * <code>true</code> once the processing of the operation is complete. This flag should not
297	 * be used by waiters external to the VDO base as it is used to gate calling the callback.
298	 */
299	bool complete;
300
301	/*
302	 * If true, queue this completion on the next callback invocation, even if it is already
303	 * running on the correct thread.
304	 */
305	bool requeue;
306
307	/* The ID of the thread which should run the next callback */
308	thread_id_t callback_thread_id;
309
310	/* The result of the operation */
311	int result;
312
313	/* The VDO on which this completion operates */
314	struct vdo *vdo;
315
316	/* The callback which will be called once the operation is complete */
317	vdo_action_fn callback;
318
319	/* Callback which, if set, will be called if an error result is set */
320	vdo_action_fn error_handler;
321
322	/* The parent object, if any, that spawned this completion */
323	void *parent;
324
325	/* Entry link for lock-free work queue */
326	struct funnel_queue_entry work_queue_entry_link;
327	enum vdo_completion_priority priority;
328	struct vdo_work_queue *my_queue;
329};
330
331struct block_allocator;
332struct data_vio;
333struct vdo;
334struct vdo_config;
335
336/* vio types for statistics and instrumentation. */
337enum vio_type {
338	VIO_TYPE_UNINITIALIZED = 0,
339	VIO_TYPE_DATA,
340	VIO_TYPE_BLOCK_ALLOCATOR,
341	VIO_TYPE_BLOCK_MAP,
342	VIO_TYPE_BLOCK_MAP_INTERIOR,
343	VIO_TYPE_GEOMETRY,
344	VIO_TYPE_PARTITION_COPY,
345	VIO_TYPE_RECOVERY_JOURNAL,
346	VIO_TYPE_SLAB_JOURNAL,
347	VIO_TYPE_SLAB_SUMMARY,
348	VIO_TYPE_SUPER_BLOCK,
349} __packed;
350
351/* Priority levels for asynchronous I/O operations performed on a vio. */
352enum vio_priority {
353	VIO_PRIORITY_LOW = 0,
354	VIO_PRIORITY_DATA = VIO_PRIORITY_LOW,
355	VIO_PRIORITY_COMPRESSED_DATA = VIO_PRIORITY_DATA,
356	VIO_PRIORITY_METADATA,
357	VIO_PRIORITY_HIGH,
358} __packed;
359
360/*
361 * A wrapper for a bio. All I/O to the storage below a vdo is conducted via vios.
362 */
363struct vio {
364	/* The completion for this vio */
365	struct vdo_completion completion;
366
367	/* The bio zone in which I/O should be processed */
368	zone_count_t bio_zone;
369
370	/* The queueing priority of the vio operation */
371	enum vio_priority priority;
372
373	/* The vio type is used for statistics and instrumentation. */
374	enum vio_type type;
375
376	/* The size of this vio in blocks */
377	unsigned int block_count;
378
379	/* The data being read or written. */
380	char *data;
381
382	/* The VDO-owned bio to use for all IO for this vio */
383	struct bio *bio;
384
385	/*
386	 * A list of enqueued bios with consecutive block numbers, stored by vdo_submit_bio() under
387	 * the first-enqueued vio. The other vios are found via their bio entries in this list, and
388	 * are not added to the work queue as separate completions.
389	 */
390	struct bio_list bios_merged;
391};
392
393#endif /* VDO_TYPES_H */
394