1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Portions Copyright (C) 1992 Drew Eckhardt 4 */ 5#ifndef _LINUX_BLKDEV_H 6#define _LINUX_BLKDEV_H 7 8#include <linux/types.h> 9#include <linux/blk_types.h> 10#include <linux/device.h> 11#include <linux/list.h> 12#include <linux/llist.h> 13#include <linux/minmax.h> 14#include <linux/timer.h> 15#include <linux/workqueue.h> 16#include <linux/wait.h> 17#include <linux/bio.h> 18#include <linux/gfp.h> 19#include <linux/kdev_t.h> 20#include <linux/rcupdate.h> 21#include <linux/percpu-refcount.h> 22#include <linux/blkzoned.h> 23#include <linux/sched.h> 24#include <linux/sbitmap.h> 25#include <linux/uuid.h> 26#include <linux/xarray.h> 27#include <linux/file.h> 28 29struct module; 30struct request_queue; 31struct elevator_queue; 32struct blk_trace; 33struct request; 34struct sg_io_hdr; 35struct blkcg_gq; 36struct blk_flush_queue; 37struct kiocb; 38struct pr_ops; 39struct rq_qos; 40struct blk_queue_stats; 41struct blk_stat_callback; 42struct blk_crypto_profile; 43 44extern const struct device_type disk_type; 45extern const struct device_type part_type; 46extern const struct class block_class; 47 48/* 49 * Maximum number of blkcg policies allowed to be registered concurrently. 50 * Defined here to simplify include dependency. 51 */ 52#define BLKCG_MAX_POLS 6 53 54#define DISK_MAX_PARTS 256 55#define DISK_NAME_LEN 32 56 57#define PARTITION_META_INFO_VOLNAMELTH 64 58/* 59 * Enough for the string representation of any kind of UUID plus NULL. 60 * EFI UUID is 36 characters. MSDOS UUID is 11 characters. 61 */ 62#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) 63 64struct partition_meta_info { 65 char uuid[PARTITION_META_INFO_UUIDLTH]; 66 u8 volname[PARTITION_META_INFO_VOLNAMELTH]; 67}; 68 69/** 70 * DOC: genhd capability flags 71 * 72 * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to 73 * removable media. When set, the device remains present even when media is not 74 * inserted. Shall not be set for devices which are removed entirely when the 75 * media is removed. 76 * 77 * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, 78 * doesn't appear in sysfs, and can't be opened from userspace or using 79 * blkdev_get*. Used for the underlying components of multipath devices. 80 * 81 * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not 82 * scan for partitions from add_disk, and users can't add partitions manually. 83 * 84 */ 85enum { 86 GENHD_FL_REMOVABLE = 1 << 0, 87 GENHD_FL_HIDDEN = 1 << 1, 88 GENHD_FL_NO_PART = 1 << 2, 89}; 90 91enum { 92 DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ 93 DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ 94}; 95 96enum { 97 /* Poll even if events_poll_msecs is unset */ 98 DISK_EVENT_FLAG_POLL = 1 << 0, 99 /* Forward events to udev */ 100 DISK_EVENT_FLAG_UEVENT = 1 << 1, 101 /* Block event polling when open for exclusive write */ 102 DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, 103}; 104 105struct disk_events; 106struct badblocks; 107 108struct blk_integrity { 109 const struct blk_integrity_profile *profile; 110 unsigned char flags; 111 unsigned char tuple_size; 112 unsigned char pi_offset; 113 unsigned char interval_exp; 114 unsigned char tag_size; 115}; 116 117typedef unsigned int __bitwise blk_mode_t; 118 119/* open for reading */ 120#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0)) 121/* open for writing */ 122#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1)) 123/* open exclusively (vs other exclusive openers */ 124#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2)) 125/* opened with O_NDELAY */ 126#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) 127/* open for "writes" only for ioctls (specialy hack for floppy.c) */ 128#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) 129/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */ 130#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5)) 131 132struct gendisk { 133 /* 134 * major/first_minor/minors should not be set by any new driver, the 135 * block core will take care of allocating them automatically. 136 */ 137 int major; 138 int first_minor; 139 int minors; 140 141 char disk_name[DISK_NAME_LEN]; /* name of major driver */ 142 143 unsigned short events; /* supported events */ 144 unsigned short event_flags; /* flags related to event processing */ 145 146 struct xarray part_tbl; 147 struct block_device *part0; 148 149 const struct block_device_operations *fops; 150 struct request_queue *queue; 151 void *private_data; 152 153 struct bio_set bio_split; 154 155 int flags; 156 unsigned long state; 157#define GD_NEED_PART_SCAN 0 158#define GD_READ_ONLY 1 159#define GD_DEAD 2 160#define GD_NATIVE_CAPACITY 3 161#define GD_ADDED 4 162#define GD_SUPPRESS_PART_SCAN 5 163#define GD_OWNS_QUEUE 6 164 165 struct mutex open_mutex; /* open/close mutex */ 166 unsigned open_partitions; /* number of open partitions */ 167 168 struct backing_dev_info *bdi; 169 struct kobject queue_kobj; /* the queue/ directory */ 170 struct kobject *slave_dir; 171#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED 172 struct list_head slave_bdevs; 173#endif 174 struct timer_rand_state *random; 175 atomic_t sync_io; /* RAID */ 176 struct disk_events *ev; 177 178#ifdef CONFIG_BLK_DEV_ZONED 179 /* 180 * Zoned block device information for request dispatch control. 181 * nr_zones is the total number of zones of the device. This is always 182 * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones 183 * bits which indicates if a zone is conventional (bit set) or 184 * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones 185 * bits which indicates if a zone is write locked, that is, if a write 186 * request targeting the zone was dispatched. 187 * 188 * Reads of this information must be protected with blk_queue_enter() / 189 * blk_queue_exit(). Modifying this information is only allowed while 190 * no requests are being processed. See also blk_mq_freeze_queue() and 191 * blk_mq_unfreeze_queue(). 192 */ 193 unsigned int nr_zones; 194 unsigned long *conv_zones_bitmap; 195 unsigned long *seq_zones_wlock; 196#endif /* CONFIG_BLK_DEV_ZONED */ 197 198#if IS_ENABLED(CONFIG_CDROM) 199 struct cdrom_device_info *cdi; 200#endif 201 int node_id; 202 struct badblocks *bb; 203 struct lockdep_map lockdep_map; 204 u64 diskseq; 205 blk_mode_t open_mode; 206 207 /* 208 * Independent sector access ranges. This is always NULL for 209 * devices that do not have multiple independent access ranges. 210 */ 211 struct blk_independent_access_ranges *ia_ranges; 212}; 213 214static inline bool disk_live(struct gendisk *disk) 215{ 216 return !inode_unhashed(disk->part0->bd_inode); 217} 218 219/** 220 * disk_openers - returns how many openers are there for a disk 221 * @disk: disk to check 222 * 223 * This returns the number of openers for a disk. Note that this value is only 224 * stable if disk->open_mutex is held. 225 * 226 * Note: Due to a quirk in the block layer open code, each open partition is 227 * only counted once even if there are multiple openers. 228 */ 229static inline unsigned int disk_openers(struct gendisk *disk) 230{ 231 return atomic_read(&disk->part0->bd_openers); 232} 233 234/* 235 * The gendisk is refcounted by the part0 block_device, and the bd_device 236 * therein is also used for device model presentation in sysfs. 237 */ 238#define dev_to_disk(device) \ 239 (dev_to_bdev(device)->bd_disk) 240#define disk_to_dev(disk) \ 241 (&((disk)->part0->bd_device)) 242 243#if IS_REACHABLE(CONFIG_CDROM) 244#define disk_to_cdi(disk) ((disk)->cdi) 245#else 246#define disk_to_cdi(disk) NULL 247#endif 248 249static inline dev_t disk_devt(struct gendisk *disk) 250{ 251 return MKDEV(disk->major, disk->first_minor); 252} 253 254static inline int blk_validate_block_size(unsigned long bsize) 255{ 256 if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) 257 return -EINVAL; 258 259 return 0; 260} 261 262static inline bool blk_op_is_passthrough(blk_opf_t op) 263{ 264 op &= REQ_OP_MASK; 265 return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; 266} 267 268/* 269 * BLK_BOUNCE_NONE: never bounce (default) 270 * BLK_BOUNCE_HIGH: bounce all highmem pages 271 */ 272enum blk_bounce { 273 BLK_BOUNCE_NONE, 274 BLK_BOUNCE_HIGH, 275}; 276 277struct queue_limits { 278 enum blk_bounce bounce; 279 unsigned long seg_boundary_mask; 280 unsigned long virt_boundary_mask; 281 282 unsigned int max_hw_sectors; 283 unsigned int max_dev_sectors; 284 unsigned int chunk_sectors; 285 unsigned int max_sectors; 286 unsigned int max_user_sectors; 287 unsigned int max_segment_size; 288 unsigned int physical_block_size; 289 unsigned int logical_block_size; 290 unsigned int alignment_offset; 291 unsigned int io_min; 292 unsigned int io_opt; 293 unsigned int max_discard_sectors; 294 unsigned int max_hw_discard_sectors; 295 unsigned int max_user_discard_sectors; 296 unsigned int max_secure_erase_sectors; 297 unsigned int max_write_zeroes_sectors; 298 unsigned int max_zone_append_sectors; 299 unsigned int discard_granularity; 300 unsigned int discard_alignment; 301 unsigned int zone_write_granularity; 302 303 unsigned short max_segments; 304 unsigned short max_integrity_segments; 305 unsigned short max_discard_segments; 306 307 unsigned char misaligned; 308 unsigned char discard_misaligned; 309 unsigned char raid_partial_stripes_expensive; 310 bool zoned; 311 unsigned int max_open_zones; 312 unsigned int max_active_zones; 313 314 /* 315 * Drivers that set dma_alignment to less than 511 must be prepared to 316 * handle individual bvec's that are not a multiple of a SECTOR_SIZE 317 * due to possible offsets. 318 */ 319 unsigned int dma_alignment; 320}; 321 322typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, 323 void *data); 324 325void disk_set_zoned(struct gendisk *disk); 326 327#define BLK_ALL_ZONES ((unsigned int)-1) 328int blkdev_report_zones(struct block_device *bdev, sector_t sector, 329 unsigned int nr_zones, report_zones_cb cb, void *data); 330int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, 331 sector_t sectors, sector_t nr_sectors); 332int blk_revalidate_disk_zones(struct gendisk *disk, 333 void (*update_driver_data)(struct gendisk *disk)); 334 335/* 336 * Independent access ranges: struct blk_independent_access_range describes 337 * a range of contiguous sectors that can be accessed using device command 338 * execution resources that are independent from the resources used for 339 * other access ranges. This is typically found with single-LUN multi-actuator 340 * HDDs where each access range is served by a different set of heads. 341 * The set of independent ranges supported by the device is defined using 342 * struct blk_independent_access_ranges. The independent ranges must not overlap 343 * and must include all sectors within the disk capacity (no sector holes 344 * allowed). 345 * For a device with multiple ranges, requests targeting sectors in different 346 * ranges can be executed in parallel. A request can straddle an access range 347 * boundary. 348 */ 349struct blk_independent_access_range { 350 struct kobject kobj; 351 sector_t sector; 352 sector_t nr_sectors; 353}; 354 355struct blk_independent_access_ranges { 356 struct kobject kobj; 357 bool sysfs_registered; 358 unsigned int nr_ia_ranges; 359 struct blk_independent_access_range ia_range[]; 360}; 361 362struct request_queue { 363 /* 364 * The queue owner gets to use this for whatever they like. 365 * ll_rw_blk doesn't touch it. 366 */ 367 void *queuedata; 368 369 struct elevator_queue *elevator; 370 371 const struct blk_mq_ops *mq_ops; 372 373 /* sw queues */ 374 struct blk_mq_ctx __percpu *queue_ctx; 375 376 /* 377 * various queue flags, see QUEUE_* below 378 */ 379 unsigned long queue_flags; 380 381 unsigned int rq_timeout; 382 383 unsigned int queue_depth; 384 385 refcount_t refs; 386 387 /* hw dispatch queues */ 388 unsigned int nr_hw_queues; 389 struct xarray hctx_table; 390 391 struct percpu_ref q_usage_counter; 392 393 struct request *last_merge; 394 395 spinlock_t queue_lock; 396 397 int quiesce_depth; 398 399 struct gendisk *disk; 400 401 /* 402 * mq queue kobject 403 */ 404 struct kobject *mq_kobj; 405 406 struct queue_limits limits; 407 408#ifdef CONFIG_BLK_DEV_INTEGRITY 409 struct blk_integrity integrity; 410#endif /* CONFIG_BLK_DEV_INTEGRITY */ 411 412#ifdef CONFIG_PM 413 struct device *dev; 414 enum rpm_status rpm_status; 415#endif 416 417 /* 418 * Number of contexts that have called blk_set_pm_only(). If this 419 * counter is above zero then only RQF_PM requests are processed. 420 */ 421 atomic_t pm_only; 422 423 struct blk_queue_stats *stats; 424 struct rq_qos *rq_qos; 425 struct mutex rq_qos_mutex; 426 427 /* 428 * ida allocated id for this queue. Used to index queues from 429 * ioctx. 430 */ 431 int id; 432 433 unsigned int dma_pad_mask; 434 435 /* 436 * queue settings 437 */ 438 unsigned long nr_requests; /* Max # of requests */ 439 440#ifdef CONFIG_BLK_INLINE_ENCRYPTION 441 struct blk_crypto_profile *crypto_profile; 442 struct kobject *crypto_kobject; 443#endif 444 445 struct timer_list timeout; 446 struct work_struct timeout_work; 447 448 atomic_t nr_active_requests_shared_tags; 449 450 unsigned int required_elevator_features; 451 452 struct blk_mq_tags *sched_shared_tags; 453 454 struct list_head icq_list; 455#ifdef CONFIG_BLK_CGROUP 456 DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); 457 struct blkcg_gq *root_blkg; 458 struct list_head blkg_list; 459 struct mutex blkcg_mutex; 460#endif 461 462 int node; 463 464 spinlock_t requeue_lock; 465 struct list_head requeue_list; 466 struct delayed_work requeue_work; 467 468#ifdef CONFIG_BLK_DEV_IO_TRACE 469 struct blk_trace __rcu *blk_trace; 470#endif 471 /* 472 * for flush operations 473 */ 474 struct blk_flush_queue *fq; 475 struct list_head flush_list; 476 477 struct mutex sysfs_lock; 478 struct mutex sysfs_dir_lock; 479 struct mutex limits_lock; 480 481 /* 482 * for reusing dead hctx instance in case of updating 483 * nr_hw_queues 484 */ 485 struct list_head unused_hctx_list; 486 spinlock_t unused_hctx_lock; 487 488 int mq_freeze_depth; 489 490#ifdef CONFIG_BLK_DEV_THROTTLING 491 /* Throttle data */ 492 struct throtl_data *td; 493#endif 494 struct rcu_head rcu_head; 495 wait_queue_head_t mq_freeze_wq; 496 /* 497 * Protect concurrent access to q_usage_counter by 498 * percpu_ref_kill() and percpu_ref_reinit(). 499 */ 500 struct mutex mq_freeze_lock; 501 502 struct blk_mq_tag_set *tag_set; 503 struct list_head tag_set_list; 504 505 struct dentry *debugfs_dir; 506 struct dentry *sched_debugfs_dir; 507 struct dentry *rqos_debugfs_dir; 508 /* 509 * Serializes all debugfs metadata operations using the above dentries. 510 */ 511 struct mutex debugfs_mutex; 512 513 bool mq_sysfs_init_done; 514}; 515 516/* Keep blk_queue_flag_name[] in sync with the definitions below */ 517#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ 518#define QUEUE_FLAG_DYING 1 /* queue being torn down */ 519#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ 520#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ 521#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ 522#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ 523#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ 524#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ 525#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ 526#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ 527#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ 528#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ 529#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ 530#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ 531#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ 532#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ 533#define QUEUE_FLAG_WC 17 /* Write back caching */ 534#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ 535#define QUEUE_FLAG_DAX 19 /* device supports DAX */ 536#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ 537#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ 538#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ 539#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ 540#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ 541#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ 542#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ 543#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ 544#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ 545#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ 546 547#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \ 548 (1UL << QUEUE_FLAG_SAME_COMP) | \ 549 (1UL << QUEUE_FLAG_NOWAIT)) 550 551void blk_queue_flag_set(unsigned int flag, struct request_queue *q); 552void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); 553bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); 554 555#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 556#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) 557#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) 558#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 559#define blk_queue_noxmerges(q) \ 560 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) 561#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) 562#define blk_queue_stable_writes(q) \ 563 test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) 564#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) 565#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) 566#define blk_queue_zone_resetall(q) \ 567 test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) 568#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) 569#define blk_queue_pci_p2pdma(q) \ 570 test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) 571#ifdef CONFIG_BLK_RQ_ALLOC_TIME 572#define blk_queue_rq_alloc_time(q) \ 573 test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags) 574#else 575#define blk_queue_rq_alloc_time(q) false 576#endif 577 578#define blk_noretry_request(rq) \ 579 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ 580 REQ_FAILFAST_DRIVER)) 581#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) 582#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) 583#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) 584#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) 585#define blk_queue_skip_tagset_quiesce(q) \ 586 test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags) 587 588extern void blk_set_pm_only(struct request_queue *q); 589extern void blk_clear_pm_only(struct request_queue *q); 590 591#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 592 593#define dma_map_bvec(dev, bv, dir, attrs) \ 594 dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ 595 (dir), (attrs)) 596 597static inline bool queue_is_mq(struct request_queue *q) 598{ 599 return q->mq_ops; 600} 601 602#ifdef CONFIG_PM 603static inline enum rpm_status queue_rpm_status(struct request_queue *q) 604{ 605 return q->rpm_status; 606} 607#else 608static inline enum rpm_status queue_rpm_status(struct request_queue *q) 609{ 610 return RPM_ACTIVE; 611} 612#endif 613 614static inline bool blk_queue_is_zoned(struct request_queue *q) 615{ 616 return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned; 617} 618 619#ifdef CONFIG_BLK_DEV_ZONED 620unsigned int bdev_nr_zones(struct block_device *bdev); 621 622static inline unsigned int disk_nr_zones(struct gendisk *disk) 623{ 624 return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; 625} 626 627static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) 628{ 629 if (!blk_queue_is_zoned(disk->queue)) 630 return 0; 631 return sector >> ilog2(disk->queue->limits.chunk_sectors); 632} 633 634static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) 635{ 636 if (!blk_queue_is_zoned(disk->queue)) 637 return false; 638 if (!disk->conv_zones_bitmap) 639 return true; 640 return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap); 641} 642 643static inline void disk_set_max_open_zones(struct gendisk *disk, 644 unsigned int max_open_zones) 645{ 646 disk->queue->limits.max_open_zones = max_open_zones; 647} 648 649static inline void disk_set_max_active_zones(struct gendisk *disk, 650 unsigned int max_active_zones) 651{ 652 disk->queue->limits.max_active_zones = max_active_zones; 653} 654 655static inline unsigned int bdev_max_open_zones(struct block_device *bdev) 656{ 657 return bdev->bd_disk->queue->limits.max_open_zones; 658} 659 660static inline unsigned int bdev_max_active_zones(struct block_device *bdev) 661{ 662 return bdev->bd_disk->queue->limits.max_active_zones; 663} 664 665#else /* CONFIG_BLK_DEV_ZONED */ 666static inline unsigned int bdev_nr_zones(struct block_device *bdev) 667{ 668 return 0; 669} 670 671static inline unsigned int disk_nr_zones(struct gendisk *disk) 672{ 673 return 0; 674} 675static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) 676{ 677 return false; 678} 679static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) 680{ 681 return 0; 682} 683static inline unsigned int bdev_max_open_zones(struct block_device *bdev) 684{ 685 return 0; 686} 687 688static inline unsigned int bdev_max_active_zones(struct block_device *bdev) 689{ 690 return 0; 691} 692#endif /* CONFIG_BLK_DEV_ZONED */ 693 694static inline unsigned int blk_queue_depth(struct request_queue *q) 695{ 696 if (q->queue_depth) 697 return q->queue_depth; 698 699 return q->nr_requests; 700} 701 702/* 703 * default timeout for SG_IO if none specified 704 */ 705#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) 706#define BLK_MIN_SG_TIMEOUT (7 * HZ) 707 708/* This should not be used directly - use rq_for_each_segment */ 709#define for_each_bio(_bio) \ 710 for (; _bio; _bio = _bio->bi_next) 711 712int __must_check device_add_disk(struct device *parent, struct gendisk *disk, 713 const struct attribute_group **groups); 714static inline int __must_check add_disk(struct gendisk *disk) 715{ 716 return device_add_disk(NULL, disk, NULL); 717} 718void del_gendisk(struct gendisk *gp); 719void invalidate_disk(struct gendisk *disk); 720void set_disk_ro(struct gendisk *disk, bool read_only); 721void disk_uevent(struct gendisk *disk, enum kobject_action action); 722 723static inline int get_disk_ro(struct gendisk *disk) 724{ 725 return disk->part0->bd_read_only || 726 test_bit(GD_READ_ONLY, &disk->state); 727} 728 729static inline int bdev_read_only(struct block_device *bdev) 730{ 731 return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); 732} 733 734bool set_capacity_and_notify(struct gendisk *disk, sector_t size); 735void disk_force_media_change(struct gendisk *disk); 736void bdev_mark_dead(struct block_device *bdev, bool surprise); 737 738void add_disk_randomness(struct gendisk *disk) __latent_entropy; 739void rand_initialize_disk(struct gendisk *disk); 740 741static inline sector_t get_start_sect(struct block_device *bdev) 742{ 743 return bdev->bd_start_sect; 744} 745 746static inline sector_t bdev_nr_sectors(struct block_device *bdev) 747{ 748 return bdev->bd_nr_sectors; 749} 750 751static inline loff_t bdev_nr_bytes(struct block_device *bdev) 752{ 753 return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; 754} 755 756static inline sector_t get_capacity(struct gendisk *disk) 757{ 758 return bdev_nr_sectors(disk->part0); 759} 760 761static inline u64 sb_bdev_nr_blocks(struct super_block *sb) 762{ 763 return bdev_nr_sectors(sb->s_bdev) >> 764 (sb->s_blocksize_bits - SECTOR_SHIFT); 765} 766 767int bdev_disk_changed(struct gendisk *disk, bool invalidate); 768 769void put_disk(struct gendisk *disk); 770struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, 771 struct lock_class_key *lkclass); 772 773/** 774 * blk_alloc_disk - allocate a gendisk structure 775 * @lim: queue limits to be used for this disk. 776 * @node_id: numa node to allocate on 777 * 778 * Allocate and pre-initialize a gendisk structure for use with BIO based 779 * drivers. 780 * 781 * Returns an ERR_PTR on error, else the allocated disk. 782 * 783 * Context: can sleep 784 */ 785#define blk_alloc_disk(lim, node_id) \ 786({ \ 787 static struct lock_class_key __key; \ 788 \ 789 __blk_alloc_disk(lim, node_id, &__key); \ 790}) 791 792int __register_blkdev(unsigned int major, const char *name, 793 void (*probe)(dev_t devt)); 794#define register_blkdev(major, name) \ 795 __register_blkdev(major, name, NULL) 796void unregister_blkdev(unsigned int major, const char *name); 797 798bool disk_check_media_change(struct gendisk *disk); 799void set_capacity(struct gendisk *disk, sector_t size); 800 801#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED 802int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); 803void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); 804#else 805static inline int bd_link_disk_holder(struct block_device *bdev, 806 struct gendisk *disk) 807{ 808 return 0; 809} 810static inline void bd_unlink_disk_holder(struct block_device *bdev, 811 struct gendisk *disk) 812{ 813} 814#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ 815 816dev_t part_devt(struct gendisk *disk, u8 partno); 817void inc_diskseq(struct gendisk *disk); 818void blk_request_module(dev_t devt); 819 820extern int blk_register_queue(struct gendisk *disk); 821extern void blk_unregister_queue(struct gendisk *disk); 822void submit_bio_noacct(struct bio *bio); 823struct bio *bio_split_to_limits(struct bio *bio); 824 825extern int blk_lld_busy(struct request_queue *q); 826extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); 827extern void blk_queue_exit(struct request_queue *q); 828extern void blk_sync_queue(struct request_queue *q); 829 830/* Helper to convert REQ_OP_XXX to its string format XXX */ 831extern const char *blk_op_str(enum req_op op); 832 833int blk_status_to_errno(blk_status_t status); 834blk_status_t errno_to_blk_status(int errno); 835const char *blk_status_to_str(blk_status_t status); 836 837/* only poll the hardware once, don't continue until a completion was found */ 838#define BLK_POLL_ONESHOT (1 << 0) 839int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); 840int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, 841 unsigned int flags); 842 843static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 844{ 845 return bdev->bd_queue; /* this is never NULL */ 846} 847 848/* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ 849const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); 850 851static inline unsigned int bio_zone_no(struct bio *bio) 852{ 853 return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); 854} 855 856static inline unsigned int bio_zone_is_seq(struct bio *bio) 857{ 858 return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); 859} 860 861/* 862 * Return how much of the chunk is left to be used for I/O at a given offset. 863 */ 864static inline unsigned int blk_chunk_sectors_left(sector_t offset, 865 unsigned int chunk_sectors) 866{ 867 if (unlikely(!is_power_of_2(chunk_sectors))) 868 return chunk_sectors - sector_div(offset, chunk_sectors); 869 return chunk_sectors - (offset & (chunk_sectors - 1)); 870} 871 872/** 873 * queue_limits_start_update - start an atomic update of queue limits 874 * @q: queue to update 875 * 876 * This functions starts an atomic update of the queue limits. It takes a lock 877 * to prevent other updates and returns a snapshot of the current limits that 878 * the caller can modify. The caller must call queue_limits_commit_update() 879 * to finish the update. 880 * 881 * Context: process context. The caller must have frozen the queue or ensured 882 * that there is outstanding I/O by other means. 883 */ 884static inline struct queue_limits 885queue_limits_start_update(struct request_queue *q) 886 __acquires(q->limits_lock) 887{ 888 mutex_lock(&q->limits_lock); 889 return q->limits; 890} 891int queue_limits_commit_update(struct request_queue *q, 892 struct queue_limits *lim); 893int queue_limits_set(struct request_queue *q, struct queue_limits *lim); 894 895/* 896 * Access functions for manipulating queue properties 897 */ 898void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit); 899extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); 900extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); 901extern void blk_queue_max_segments(struct request_queue *, unsigned short); 902extern void blk_queue_max_discard_segments(struct request_queue *, 903 unsigned short); 904void blk_queue_max_secure_erase_sectors(struct request_queue *q, 905 unsigned int max_sectors); 906extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); 907extern void blk_queue_max_discard_sectors(struct request_queue *q, 908 unsigned int max_discard_sectors); 909extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, 910 unsigned int max_write_same_sectors); 911extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); 912extern void blk_queue_max_zone_append_sectors(struct request_queue *q, 913 unsigned int max_zone_append_sectors); 914extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); 915void blk_queue_zone_write_granularity(struct request_queue *q, 916 unsigned int size); 917extern void blk_queue_alignment_offset(struct request_queue *q, 918 unsigned int alignment); 919void disk_update_readahead(struct gendisk *disk); 920extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); 921extern void blk_queue_io_min(struct request_queue *q, unsigned int min); 922extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); 923extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); 924extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); 925extern void blk_set_stacking_limits(struct queue_limits *lim); 926extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, 927 sector_t offset); 928void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, 929 sector_t offset, const char *pfx); 930extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); 931extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); 932extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); 933extern void blk_queue_dma_alignment(struct request_queue *, int); 934extern void blk_queue_update_dma_alignment(struct request_queue *, int); 935extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 936extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); 937 938struct blk_independent_access_ranges * 939disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); 940void disk_set_independent_access_ranges(struct gendisk *disk, 941 struct blk_independent_access_ranges *iars); 942 943/* 944 * Elevator features for blk_queue_required_elevator_features: 945 */ 946/* Supports zoned block devices sequential write constraint */ 947#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) 948 949extern void blk_queue_required_elevator_features(struct request_queue *q, 950 unsigned int features); 951extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, 952 struct device *dev); 953 954bool __must_check blk_get_queue(struct request_queue *); 955extern void blk_put_queue(struct request_queue *); 956 957void blk_mark_disk_dead(struct gendisk *disk); 958 959#ifdef CONFIG_BLOCK 960/* 961 * blk_plug permits building a queue of related requests by holding the I/O 962 * fragments for a short period. This allows merging of sequential requests 963 * into single larger request. As the requests are moved from a per-task list to 964 * the device's request_queue in a batch, this results in improved scalability 965 * as the lock contention for request_queue lock is reduced. 966 * 967 * It is ok not to disable preemption when adding the request to the plug list 968 * or when attempting a merge. For details, please see schedule() where 969 * blk_flush_plug() is called. 970 */ 971struct blk_plug { 972 struct request *mq_list; /* blk-mq requests */ 973 974 /* if ios_left is > 1, we can batch tag/rq allocations */ 975 struct request *cached_rq; 976 u64 cur_ktime; 977 unsigned short nr_ios; 978 979 unsigned short rq_count; 980 981 bool multiple_queues; 982 bool has_elevator; 983 984 struct list_head cb_list; /* md requires an unplug callback */ 985}; 986 987struct blk_plug_cb; 988typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); 989struct blk_plug_cb { 990 struct list_head list; 991 blk_plug_cb_fn callback; 992 void *data; 993}; 994extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, 995 void *data, int size); 996extern void blk_start_plug(struct blk_plug *); 997extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); 998extern void blk_finish_plug(struct blk_plug *); 999 1000void __blk_flush_plug(struct blk_plug *plug, bool from_schedule); 1001static inline void blk_flush_plug(struct blk_plug *plug, bool async) 1002{ 1003 if (plug) 1004 __blk_flush_plug(plug, async); 1005} 1006 1007/* 1008 * tsk == current here 1009 */ 1010static inline void blk_plug_invalidate_ts(struct task_struct *tsk) 1011{ 1012 struct blk_plug *plug = tsk->plug; 1013 1014 if (plug) 1015 plug->cur_ktime = 0; 1016 current->flags &= ~PF_BLOCK_TS; 1017} 1018 1019int blkdev_issue_flush(struct block_device *bdev); 1020long nr_blockdev_pages(void); 1021#else /* CONFIG_BLOCK */ 1022struct blk_plug { 1023}; 1024 1025static inline void blk_start_plug_nr_ios(struct blk_plug *plug, 1026 unsigned short nr_ios) 1027{ 1028} 1029 1030static inline void blk_start_plug(struct blk_plug *plug) 1031{ 1032} 1033 1034static inline void blk_finish_plug(struct blk_plug *plug) 1035{ 1036} 1037 1038static inline void blk_flush_plug(struct blk_plug *plug, bool async) 1039{ 1040} 1041 1042static inline void blk_plug_invalidate_ts(struct task_struct *tsk) 1043{ 1044} 1045 1046static inline int blkdev_issue_flush(struct block_device *bdev) 1047{ 1048 return 0; 1049} 1050 1051static inline long nr_blockdev_pages(void) 1052{ 1053 return 0; 1054} 1055#endif /* CONFIG_BLOCK */ 1056 1057extern void blk_io_schedule(void); 1058 1059int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1060 sector_t nr_sects, gfp_t gfp_mask); 1061int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1062 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop); 1063int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, 1064 sector_t nr_sects, gfp_t gfp); 1065 1066#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ 1067#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ 1068 1069extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 1070 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, 1071 unsigned flags); 1072extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 1073 sector_t nr_sects, gfp_t gfp_mask, unsigned flags); 1074 1075static inline int sb_issue_discard(struct super_block *sb, sector_t block, 1076 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) 1077{ 1078 return blkdev_issue_discard(sb->s_bdev, 1079 block << (sb->s_blocksize_bits - 1080 SECTOR_SHIFT), 1081 nr_blocks << (sb->s_blocksize_bits - 1082 SECTOR_SHIFT), 1083 gfp_mask); 1084} 1085static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, 1086 sector_t nr_blocks, gfp_t gfp_mask) 1087{ 1088 return blkdev_issue_zeroout(sb->s_bdev, 1089 block << (sb->s_blocksize_bits - 1090 SECTOR_SHIFT), 1091 nr_blocks << (sb->s_blocksize_bits - 1092 SECTOR_SHIFT), 1093 gfp_mask, 0); 1094} 1095 1096static inline bool bdev_is_partition(struct block_device *bdev) 1097{ 1098 return bdev->bd_partno; 1099} 1100 1101enum blk_default_limits { 1102 BLK_MAX_SEGMENTS = 128, 1103 BLK_SAFE_MAX_SECTORS = 255, 1104 BLK_MAX_SEGMENT_SIZE = 65536, 1105 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, 1106}; 1107 1108/* 1109 * Default upper limit for the software max_sectors limit used for 1110 * regular file system I/O. This can be increased through sysfs. 1111 * 1112 * Not to be confused with the max_hw_sector limit that is entirely 1113 * controlled by the driver, usually based on hardware limits. 1114 */ 1115#define BLK_DEF_MAX_SECTORS_CAP 2560u 1116 1117static inline unsigned long queue_segment_boundary(const struct request_queue *q) 1118{ 1119 return q->limits.seg_boundary_mask; 1120} 1121 1122static inline unsigned long queue_virt_boundary(const struct request_queue *q) 1123{ 1124 return q->limits.virt_boundary_mask; 1125} 1126 1127static inline unsigned int queue_max_sectors(const struct request_queue *q) 1128{ 1129 return q->limits.max_sectors; 1130} 1131 1132static inline unsigned int queue_max_bytes(struct request_queue *q) 1133{ 1134 return min_t(unsigned int, queue_max_sectors(q), INT_MAX >> 9) << 9; 1135} 1136 1137static inline unsigned int queue_max_hw_sectors(const struct request_queue *q) 1138{ 1139 return q->limits.max_hw_sectors; 1140} 1141 1142static inline unsigned short queue_max_segments(const struct request_queue *q) 1143{ 1144 return q->limits.max_segments; 1145} 1146 1147static inline unsigned short queue_max_discard_segments(const struct request_queue *q) 1148{ 1149 return q->limits.max_discard_segments; 1150} 1151 1152static inline unsigned int queue_max_segment_size(const struct request_queue *q) 1153{ 1154 return q->limits.max_segment_size; 1155} 1156 1157static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q) 1158{ 1159 1160 const struct queue_limits *l = &q->limits; 1161 1162 return min(l->max_zone_append_sectors, l->max_sectors); 1163} 1164 1165static inline unsigned int 1166bdev_max_zone_append_sectors(struct block_device *bdev) 1167{ 1168 return queue_max_zone_append_sectors(bdev_get_queue(bdev)); 1169} 1170 1171static inline unsigned int bdev_max_segments(struct block_device *bdev) 1172{ 1173 return queue_max_segments(bdev_get_queue(bdev)); 1174} 1175 1176static inline unsigned queue_logical_block_size(const struct request_queue *q) 1177{ 1178 int retval = 512; 1179 1180 if (q && q->limits.logical_block_size) 1181 retval = q->limits.logical_block_size; 1182 1183 return retval; 1184} 1185 1186static inline unsigned int bdev_logical_block_size(struct block_device *bdev) 1187{ 1188 return queue_logical_block_size(bdev_get_queue(bdev)); 1189} 1190 1191static inline unsigned int queue_physical_block_size(const struct request_queue *q) 1192{ 1193 return q->limits.physical_block_size; 1194} 1195 1196static inline unsigned int bdev_physical_block_size(struct block_device *bdev) 1197{ 1198 return queue_physical_block_size(bdev_get_queue(bdev)); 1199} 1200 1201static inline unsigned int queue_io_min(const struct request_queue *q) 1202{ 1203 return q->limits.io_min; 1204} 1205 1206static inline int bdev_io_min(struct block_device *bdev) 1207{ 1208 return queue_io_min(bdev_get_queue(bdev)); 1209} 1210 1211static inline unsigned int queue_io_opt(const struct request_queue *q) 1212{ 1213 return q->limits.io_opt; 1214} 1215 1216static inline int bdev_io_opt(struct block_device *bdev) 1217{ 1218 return queue_io_opt(bdev_get_queue(bdev)); 1219} 1220 1221static inline unsigned int 1222queue_zone_write_granularity(const struct request_queue *q) 1223{ 1224 return q->limits.zone_write_granularity; 1225} 1226 1227static inline unsigned int 1228bdev_zone_write_granularity(struct block_device *bdev) 1229{ 1230 return queue_zone_write_granularity(bdev_get_queue(bdev)); 1231} 1232 1233int bdev_alignment_offset(struct block_device *bdev); 1234unsigned int bdev_discard_alignment(struct block_device *bdev); 1235 1236static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev) 1237{ 1238 return bdev_get_queue(bdev)->limits.max_discard_sectors; 1239} 1240 1241static inline unsigned int bdev_discard_granularity(struct block_device *bdev) 1242{ 1243 return bdev_get_queue(bdev)->limits.discard_granularity; 1244} 1245 1246static inline unsigned int 1247bdev_max_secure_erase_sectors(struct block_device *bdev) 1248{ 1249 return bdev_get_queue(bdev)->limits.max_secure_erase_sectors; 1250} 1251 1252static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) 1253{ 1254 struct request_queue *q = bdev_get_queue(bdev); 1255 1256 if (q) 1257 return q->limits.max_write_zeroes_sectors; 1258 1259 return 0; 1260} 1261 1262static inline bool bdev_nonrot(struct block_device *bdev) 1263{ 1264 return blk_queue_nonrot(bdev_get_queue(bdev)); 1265} 1266 1267static inline bool bdev_synchronous(struct block_device *bdev) 1268{ 1269 return test_bit(QUEUE_FLAG_SYNCHRONOUS, 1270 &bdev_get_queue(bdev)->queue_flags); 1271} 1272 1273static inline bool bdev_stable_writes(struct block_device *bdev) 1274{ 1275 return test_bit(QUEUE_FLAG_STABLE_WRITES, 1276 &bdev_get_queue(bdev)->queue_flags); 1277} 1278 1279static inline bool bdev_write_cache(struct block_device *bdev) 1280{ 1281 return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags); 1282} 1283 1284static inline bool bdev_fua(struct block_device *bdev) 1285{ 1286 return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags); 1287} 1288 1289static inline bool bdev_nowait(struct block_device *bdev) 1290{ 1291 return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); 1292} 1293 1294static inline bool bdev_is_zoned(struct block_device *bdev) 1295{ 1296 return blk_queue_is_zoned(bdev_get_queue(bdev)); 1297} 1298 1299static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec) 1300{ 1301 return disk_zone_no(bdev->bd_disk, sec); 1302} 1303 1304/* Whether write serialization is required for @op on zoned devices. */ 1305static inline bool op_needs_zoned_write_locking(enum req_op op) 1306{ 1307 return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES; 1308} 1309 1310static inline bool bdev_op_is_zoned_write(struct block_device *bdev, 1311 enum req_op op) 1312{ 1313 return bdev_is_zoned(bdev) && op_needs_zoned_write_locking(op); 1314} 1315 1316static inline sector_t bdev_zone_sectors(struct block_device *bdev) 1317{ 1318 struct request_queue *q = bdev_get_queue(bdev); 1319 1320 if (!blk_queue_is_zoned(q)) 1321 return 0; 1322 return q->limits.chunk_sectors; 1323} 1324 1325static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev, 1326 sector_t sector) 1327{ 1328 return sector & (bdev_zone_sectors(bdev) - 1); 1329} 1330 1331static inline bool bdev_is_zone_start(struct block_device *bdev, 1332 sector_t sector) 1333{ 1334 return bdev_offset_from_zone_start(bdev, sector) == 0; 1335} 1336 1337static inline int queue_dma_alignment(const struct request_queue *q) 1338{ 1339 return q ? q->limits.dma_alignment : 511; 1340} 1341 1342static inline unsigned int bdev_dma_alignment(struct block_device *bdev) 1343{ 1344 return queue_dma_alignment(bdev_get_queue(bdev)); 1345} 1346 1347static inline bool bdev_iter_is_aligned(struct block_device *bdev, 1348 struct iov_iter *iter) 1349{ 1350 return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev), 1351 bdev_logical_block_size(bdev) - 1); 1352} 1353 1354static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, 1355 unsigned int len) 1356{ 1357 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; 1358 return !(addr & alignment) && !(len & alignment); 1359} 1360 1361/* assumes size > 256 */ 1362static inline unsigned int blksize_bits(unsigned int size) 1363{ 1364 return order_base_2(size >> SECTOR_SHIFT) + SECTOR_SHIFT; 1365} 1366 1367static inline unsigned int block_size(struct block_device *bdev) 1368{ 1369 return 1 << bdev->bd_inode->i_blkbits; 1370} 1371 1372int kblockd_schedule_work(struct work_struct *work); 1373int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); 1374 1375#define MODULE_ALIAS_BLOCKDEV(major,minor) \ 1376 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 1377#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 1378 MODULE_ALIAS("block-major-" __stringify(major) "-*") 1379 1380#ifdef CONFIG_BLK_INLINE_ENCRYPTION 1381 1382bool blk_crypto_register(struct blk_crypto_profile *profile, 1383 struct request_queue *q); 1384 1385#else /* CONFIG_BLK_INLINE_ENCRYPTION */ 1386 1387static inline bool blk_crypto_register(struct blk_crypto_profile *profile, 1388 struct request_queue *q) 1389{ 1390 return true; 1391} 1392 1393#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ 1394 1395enum blk_unique_id { 1396 /* these match the Designator Types specified in SPC */ 1397 BLK_UID_T10 = 1, 1398 BLK_UID_EUI64 = 2, 1399 BLK_UID_NAA = 3, 1400}; 1401 1402struct block_device_operations { 1403 void (*submit_bio)(struct bio *bio); 1404 int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob, 1405 unsigned int flags); 1406 int (*open)(struct gendisk *disk, blk_mode_t mode); 1407 void (*release)(struct gendisk *disk); 1408 int (*ioctl)(struct block_device *bdev, blk_mode_t mode, 1409 unsigned cmd, unsigned long arg); 1410 int (*compat_ioctl)(struct block_device *bdev, blk_mode_t mode, 1411 unsigned cmd, unsigned long arg); 1412 unsigned int (*check_events) (struct gendisk *disk, 1413 unsigned int clearing); 1414 void (*unlock_native_capacity) (struct gendisk *); 1415 int (*getgeo)(struct block_device *, struct hd_geometry *); 1416 int (*set_read_only)(struct block_device *bdev, bool ro); 1417 void (*free_disk)(struct gendisk *disk); 1418 /* this callback is with swap_lock and sometimes page table lock held */ 1419 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1420 int (*report_zones)(struct gendisk *, sector_t sector, 1421 unsigned int nr_zones, report_zones_cb cb, void *data); 1422 char *(*devnode)(struct gendisk *disk, umode_t *mode); 1423 /* returns the length of the identifier or a negative errno: */ 1424 int (*get_unique_id)(struct gendisk *disk, u8 id[16], 1425 enum blk_unique_id id_type); 1426 struct module *owner; 1427 const struct pr_ops *pr_ops; 1428 1429 /* 1430 * Special callback for probing GPT entry at a given sector. 1431 * Needed by Android devices, used by GPT scanner and MMC blk 1432 * driver. 1433 */ 1434 int (*alternative_gpt_sector)(struct gendisk *disk, sector_t *sector); 1435}; 1436 1437#ifdef CONFIG_COMPAT 1438extern int blkdev_compat_ptr_ioctl(struct block_device *, blk_mode_t, 1439 unsigned int, unsigned long); 1440#else 1441#define blkdev_compat_ptr_ioctl NULL 1442#endif 1443 1444static inline void blk_wake_io_task(struct task_struct *waiter) 1445{ 1446 /* 1447 * If we're polling, the task itself is doing the completions. For 1448 * that case, we don't need to signal a wakeup, it's enough to just 1449 * mark us as RUNNING. 1450 */ 1451 if (waiter == current) 1452 __set_current_state(TASK_RUNNING); 1453 else 1454 wake_up_process(waiter); 1455} 1456 1457unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, 1458 unsigned long start_time); 1459void bdev_end_io_acct(struct block_device *bdev, enum req_op op, 1460 unsigned int sectors, unsigned long start_time); 1461 1462unsigned long bio_start_io_acct(struct bio *bio); 1463void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, 1464 struct block_device *orig_bdev); 1465 1466/** 1467 * bio_end_io_acct - end I/O accounting for bio based drivers 1468 * @bio: bio to end account for 1469 * @start_time: start time returned by bio_start_io_acct() 1470 */ 1471static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) 1472{ 1473 return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); 1474} 1475 1476int bdev_read_only(struct block_device *bdev); 1477int set_blocksize(struct block_device *bdev, int size); 1478 1479int lookup_bdev(const char *pathname, dev_t *dev); 1480 1481void blkdev_show(struct seq_file *seqf, off_t offset); 1482 1483#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 1484#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ 1485#ifdef CONFIG_BLOCK 1486#define BLKDEV_MAJOR_MAX 512 1487#else 1488#define BLKDEV_MAJOR_MAX 0 1489#endif 1490 1491struct blk_holder_ops { 1492 void (*mark_dead)(struct block_device *bdev, bool surprise); 1493 1494 /* 1495 * Sync the file system mounted on the block device. 1496 */ 1497 void (*sync)(struct block_device *bdev); 1498 1499 /* 1500 * Freeze the file system mounted on the block device. 1501 */ 1502 int (*freeze)(struct block_device *bdev); 1503 1504 /* 1505 * Thaw the file system mounted on the block device. 1506 */ 1507 int (*thaw)(struct block_device *bdev); 1508 1509 /* 1510 * If needed, get a reference to the holder. 1511 */ 1512 void (*get_holder)(void *holder); 1513 1514 /* 1515 * Release the holder. 1516 */ 1517 void (*put_holder)(void *holder); 1518}; 1519 1520/* 1521 * For filesystems using @fs_holder_ops, the @holder argument passed to 1522 * helpers used to open and claim block devices via 1523 * bd_prepare_to_claim() must point to a superblock. 1524 */ 1525extern const struct blk_holder_ops fs_holder_ops; 1526 1527/* 1528 * Return the correct open flags for blkdev_get_by_* for super block flags 1529 * as stored in sb->s_flags. 1530 */ 1531#define sb_open_mode(flags) \ 1532 (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ 1533 (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) 1534 1535struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, 1536 const struct blk_holder_ops *hops); 1537struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, 1538 void *holder, const struct blk_holder_ops *hops); 1539int bd_prepare_to_claim(struct block_device *bdev, void *holder, 1540 const struct blk_holder_ops *hops); 1541void bd_abort_claiming(struct block_device *bdev, void *holder); 1542 1543/* just for blk-cgroup, don't use elsewhere */ 1544struct block_device *blkdev_get_no_open(dev_t dev); 1545void blkdev_put_no_open(struct block_device *bdev); 1546 1547struct block_device *I_BDEV(struct inode *inode); 1548struct block_device *file_bdev(struct file *bdev_file); 1549 1550#ifdef CONFIG_BLOCK 1551void invalidate_bdev(struct block_device *bdev); 1552int sync_blockdev(struct block_device *bdev); 1553int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); 1554int sync_blockdev_nowait(struct block_device *bdev); 1555void sync_bdevs(bool wait); 1556void bdev_statx_dioalign(struct inode *inode, struct kstat *stat); 1557void printk_all_partitions(void); 1558int __init early_lookup_bdev(const char *pathname, dev_t *dev); 1559#else 1560static inline void invalidate_bdev(struct block_device *bdev) 1561{ 1562} 1563static inline int sync_blockdev(struct block_device *bdev) 1564{ 1565 return 0; 1566} 1567static inline int sync_blockdev_nowait(struct block_device *bdev) 1568{ 1569 return 0; 1570} 1571static inline void sync_bdevs(bool wait) 1572{ 1573} 1574static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) 1575{ 1576} 1577static inline void printk_all_partitions(void) 1578{ 1579} 1580static inline int early_lookup_bdev(const char *pathname, dev_t *dev) 1581{ 1582 return -EINVAL; 1583} 1584#endif /* CONFIG_BLOCK */ 1585 1586int bdev_freeze(struct block_device *bdev); 1587int bdev_thaw(struct block_device *bdev); 1588 1589struct io_comp_batch { 1590 struct request *req_list; 1591 bool need_ts; 1592 void (*complete)(struct io_comp_batch *); 1593}; 1594 1595#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } 1596 1597#endif /* _LINUX_BLKDEV_H */ 1598