1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include <linux/module.h>
7#include <linux/compiler.h>
8#include <linux/fs.h>
9#include <linux/iomap.h>
10#include <linux/swap.h>
11
12/* Swapfile activation */
13
14struct iomap_swapfile_info {
15	struct iomap iomap;		/* accumulated iomap */
16	struct swap_info_struct *sis;
17	uint64_t lowest_ppage;		/* lowest physical addr seen (pages) */
18	uint64_t highest_ppage;		/* highest physical addr seen (pages) */
19	unsigned long nr_pages;		/* number of pages collected */
20	int nr_extents;			/* extent count */
21	struct file *file;
22};
23
24/*
25 * Collect physical extents for this swap file.  Physical extents reported to
26 * the swap code must be trimmed to align to a page boundary.  The logical
27 * offset within the file is irrelevant since the swapfile code maps logical
28 * page numbers of the swap device to the physical page-aligned extents.
29 */
30static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
31{
32	struct iomap *iomap = &isi->iomap;
33	unsigned long nr_pages;
34	unsigned long max_pages;
35	uint64_t first_ppage;
36	uint64_t first_ppage_reported;
37	uint64_t next_ppage;
38	int error;
39
40	if (unlikely(isi->nr_pages >= isi->sis->max))
41		return 0;
42	max_pages = isi->sis->max - isi->nr_pages;
43
44	/*
45	 * Round the start up and the end down so that the physical
46	 * extent aligns to a page boundary.
47	 */
48	first_ppage = ALIGN(iomap->addr, PAGE_SIZE) >> PAGE_SHIFT;
49	next_ppage = ALIGN_DOWN(iomap->addr + iomap->length, PAGE_SIZE) >>
50			PAGE_SHIFT;
51
52	/* Skip too-short physical extents. */
53	if (first_ppage >= next_ppage)
54		return 0;
55	nr_pages = next_ppage - first_ppage;
56	nr_pages = min(nr_pages, max_pages);
57
58	/*
59	 * Calculate how much swap space we're adding; the first page contains
60	 * the swap header and doesn't count.  The mm still wants that first
61	 * page fed to add_swap_extent, however.
62	 */
63	first_ppage_reported = first_ppage;
64	if (iomap->offset == 0)
65		first_ppage_reported++;
66	if (isi->lowest_ppage > first_ppage_reported)
67		isi->lowest_ppage = first_ppage_reported;
68	if (isi->highest_ppage < (next_ppage - 1))
69		isi->highest_ppage = next_ppage - 1;
70
71	/* Add extent, set up for the next call. */
72	error = add_swap_extent(isi->sis, isi->nr_pages, nr_pages, first_ppage);
73	if (error < 0)
74		return error;
75	isi->nr_extents += error;
76	isi->nr_pages += nr_pages;
77	return 0;
78}
79
80static int iomap_swapfile_fail(struct iomap_swapfile_info *isi, const char *str)
81{
82	char *buf, *p = ERR_PTR(-ENOMEM);
83
84	buf = kmalloc(PATH_MAX, GFP_KERNEL);
85	if (buf)
86		p = file_path(isi->file, buf, PATH_MAX);
87	pr_err("swapon: file %s %s\n", IS_ERR(p) ? "<unknown>" : p, str);
88	kfree(buf);
89	return -EINVAL;
90}
91
92/*
93 * Accumulate iomaps for this swap file.  We have to accumulate iomaps because
94 * swap only cares about contiguous page-aligned physical extents and makes no
95 * distinction between written and unwritten extents.
96 */
97static loff_t iomap_swapfile_iter(const struct iomap_iter *iter,
98		struct iomap *iomap, struct iomap_swapfile_info *isi)
99{
100	switch (iomap->type) {
101	case IOMAP_MAPPED:
102	case IOMAP_UNWRITTEN:
103		/* Only real or unwritten extents. */
104		break;
105	case IOMAP_INLINE:
106		/* No inline data. */
107		return iomap_swapfile_fail(isi, "is inline");
108	default:
109		return iomap_swapfile_fail(isi, "has unallocated extents");
110	}
111
112	/* No uncommitted metadata or shared blocks. */
113	if (iomap->flags & IOMAP_F_DIRTY)
114		return iomap_swapfile_fail(isi, "is not committed");
115	if (iomap->flags & IOMAP_F_SHARED)
116		return iomap_swapfile_fail(isi, "has shared extents");
117
118	/* Only one bdev per swap file. */
119	if (iomap->bdev != isi->sis->bdev)
120		return iomap_swapfile_fail(isi, "outside the main device");
121
122	if (isi->iomap.length == 0) {
123		/* No accumulated extent, so just store it. */
124		memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
125	} else if (isi->iomap.addr + isi->iomap.length == iomap->addr) {
126		/* Append this to the accumulated extent. */
127		isi->iomap.length += iomap->length;
128	} else {
129		/* Otherwise, add the retained iomap and store this one. */
130		int error = iomap_swapfile_add_extent(isi);
131		if (error)
132			return error;
133		memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
134	}
135	return iomap_length(iter);
136}
137
138/*
139 * Iterate a swap file's iomaps to construct physical extents that can be
140 * passed to the swapfile subsystem.
141 */
142int iomap_swapfile_activate(struct swap_info_struct *sis,
143		struct file *swap_file, sector_t *pagespan,
144		const struct iomap_ops *ops)
145{
146	struct inode *inode = swap_file->f_mapping->host;
147	struct iomap_iter iter = {
148		.inode	= inode,
149		.pos	= 0,
150		.len	= ALIGN_DOWN(i_size_read(inode), PAGE_SIZE),
151		.flags	= IOMAP_REPORT,
152	};
153	struct iomap_swapfile_info isi = {
154		.sis = sis,
155		.lowest_ppage = (sector_t)-1ULL,
156		.file = swap_file,
157	};
158	int ret;
159
160	/*
161	 * Persist all file mapping metadata so that we won't have any
162	 * IOMAP_F_DIRTY iomaps.
163	 */
164	ret = vfs_fsync(swap_file, 1);
165	if (ret)
166		return ret;
167
168	while ((ret = iomap_iter(&iter, ops)) > 0)
169		iter.processed = iomap_swapfile_iter(&iter, &iter.iomap, &isi);
170	if (ret < 0)
171		return ret;
172
173	if (isi.iomap.length) {
174		ret = iomap_swapfile_add_extent(&isi);
175		if (ret)
176			return ret;
177	}
178
179	/*
180	 * If this swapfile doesn't contain even a single page-aligned
181	 * contiguous range of blocks, reject this useless swapfile to
182	 * prevent confusion later on.
183	 */
184	if (isi.nr_pages == 0) {
185		pr_warn("swapon: Cannot find a single usable page in file.\n");
186		return -EINVAL;
187	}
188
189	*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
190	sis->max = isi.nr_pages;
191	sis->pages = isi.nr_pages - 1;
192	sis->highest_bit = isi.nr_pages - 1;
193	return isi.nr_extents;
194}
195EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
196