1/*
2 * fs/logfs/dev_bdev.c	- Device access methods for block devices
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7 */
8#include "logfs.h"
9#include <linux/bio.h>
10#include <linux/blkdev.h>
11#include <linux/buffer_head.h>
12#include <linux/gfp.h>
13
14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
15
16static void request_complete(struct bio *bio, int err)
17{
18	complete((struct completion *)bio->bi_private);
19}
20
21static int sync_request(struct page *page, struct block_device *bdev, int rw)
22{
23	struct bio bio;
24	struct bio_vec bio_vec;
25	struct completion complete;
26
27	bio_init(&bio);
28	bio.bi_io_vec = &bio_vec;
29	bio_vec.bv_page = page;
30	bio_vec.bv_len = PAGE_SIZE;
31	bio_vec.bv_offset = 0;
32	bio.bi_vcnt = 1;
33	bio.bi_idx = 0;
34	bio.bi_size = PAGE_SIZE;
35	bio.bi_bdev = bdev;
36	bio.bi_sector = page->index * (PAGE_SIZE >> 9);
37	init_completion(&complete);
38	bio.bi_private = &complete;
39	bio.bi_end_io = request_complete;
40
41	submit_bio(rw, &bio);
42	generic_unplug_device(bdev_get_queue(bdev));
43	wait_for_completion(&complete);
44	return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
45}
46
47static int bdev_readpage(void *_sb, struct page *page)
48{
49	struct super_block *sb = _sb;
50	struct block_device *bdev = logfs_super(sb)->s_bdev;
51	int err;
52
53	err = sync_request(page, bdev, READ);
54	if (err) {
55		ClearPageUptodate(page);
56		SetPageError(page);
57	} else {
58		SetPageUptodate(page);
59		ClearPageError(page);
60	}
61	unlock_page(page);
62	return err;
63}
64
65static DECLARE_WAIT_QUEUE_HEAD(wq);
66
67static void writeseg_end_io(struct bio *bio, int err)
68{
69	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
70	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
71	struct super_block *sb = bio->bi_private;
72	struct logfs_super *super = logfs_super(sb);
73	struct page *page;
74
75	BUG_ON(!uptodate);
76	BUG_ON(err);
77	BUG_ON(bio->bi_vcnt == 0);
78	do {
79		page = bvec->bv_page;
80		if (--bvec >= bio->bi_io_vec)
81			prefetchw(&bvec->bv_page->flags);
82
83		end_page_writeback(page);
84		page_cache_release(page);
85	} while (bvec >= bio->bi_io_vec);
86	bio_put(bio);
87	if (atomic_dec_and_test(&super->s_pending_writes))
88		wake_up(&wq);
89}
90
91static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
92		size_t nr_pages)
93{
94	struct logfs_super *super = logfs_super(sb);
95	struct address_space *mapping = super->s_mapping_inode->i_mapping;
96	struct bio *bio;
97	struct page *page;
98	struct request_queue *q = bdev_get_queue(sb->s_bdev);
99	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
100	int i;
101
102	if (max_pages > BIO_MAX_PAGES)
103		max_pages = BIO_MAX_PAGES;
104	bio = bio_alloc(GFP_NOFS, max_pages);
105	BUG_ON(!bio);
106
107	for (i = 0; i < nr_pages; i++) {
108		if (i >= max_pages) {
109			/* Block layer cannot split bios :( */
110			bio->bi_vcnt = i;
111			bio->bi_idx = 0;
112			bio->bi_size = i * PAGE_SIZE;
113			bio->bi_bdev = super->s_bdev;
114			bio->bi_sector = ofs >> 9;
115			bio->bi_private = sb;
116			bio->bi_end_io = writeseg_end_io;
117			atomic_inc(&super->s_pending_writes);
118			submit_bio(WRITE, bio);
119
120			ofs += i * PAGE_SIZE;
121			index += i;
122			nr_pages -= i;
123			i = 0;
124
125			bio = bio_alloc(GFP_NOFS, max_pages);
126			BUG_ON(!bio);
127		}
128		page = find_lock_page(mapping, index + i);
129		BUG_ON(!page);
130		bio->bi_io_vec[i].bv_page = page;
131		bio->bi_io_vec[i].bv_len = PAGE_SIZE;
132		bio->bi_io_vec[i].bv_offset = 0;
133
134		BUG_ON(PageWriteback(page));
135		set_page_writeback(page);
136		unlock_page(page);
137	}
138	bio->bi_vcnt = nr_pages;
139	bio->bi_idx = 0;
140	bio->bi_size = nr_pages * PAGE_SIZE;
141	bio->bi_bdev = super->s_bdev;
142	bio->bi_sector = ofs >> 9;
143	bio->bi_private = sb;
144	bio->bi_end_io = writeseg_end_io;
145	atomic_inc(&super->s_pending_writes);
146	submit_bio(WRITE, bio);
147	return 0;
148}
149
150static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
151{
152	struct logfs_super *super = logfs_super(sb);
153	int head;
154
155	BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO);
156
157	if (len == 0) {
158		/* This can happen when the object fit perfectly into a
159		 * segment, the segment gets written per sync and subsequently
160		 * closed.
161		 */
162		return;
163	}
164	head = ofs & (PAGE_SIZE - 1);
165	if (head) {
166		ofs -= head;
167		len += head;
168	}
169	len = PAGE_ALIGN(len);
170	__bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
171	generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
172}
173
174
175static void erase_end_io(struct bio *bio, int err)
176{
177	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
178	struct super_block *sb = bio->bi_private;
179	struct logfs_super *super = logfs_super(sb);
180
181	BUG_ON(!uptodate);
182	BUG_ON(err);
183	BUG_ON(bio->bi_vcnt == 0);
184	bio_put(bio);
185	if (atomic_dec_and_test(&super->s_pending_writes))
186		wake_up(&wq);
187}
188
189static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
190		size_t nr_pages)
191{
192	struct logfs_super *super = logfs_super(sb);
193	struct bio *bio;
194	struct request_queue *q = bdev_get_queue(sb->s_bdev);
195	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
196	int i;
197
198	if (max_pages > BIO_MAX_PAGES)
199		max_pages = BIO_MAX_PAGES;
200	bio = bio_alloc(GFP_NOFS, max_pages);
201	BUG_ON(!bio);
202
203	for (i = 0; i < nr_pages; i++) {
204		if (i >= max_pages) {
205			/* Block layer cannot split bios :( */
206			bio->bi_vcnt = i;
207			bio->bi_idx = 0;
208			bio->bi_size = i * PAGE_SIZE;
209			bio->bi_bdev = super->s_bdev;
210			bio->bi_sector = ofs >> 9;
211			bio->bi_private = sb;
212			bio->bi_end_io = erase_end_io;
213			atomic_inc(&super->s_pending_writes);
214			submit_bio(WRITE, bio);
215
216			ofs += i * PAGE_SIZE;
217			index += i;
218			nr_pages -= i;
219			i = 0;
220
221			bio = bio_alloc(GFP_NOFS, max_pages);
222			BUG_ON(!bio);
223		}
224		bio->bi_io_vec[i].bv_page = super->s_erase_page;
225		bio->bi_io_vec[i].bv_len = PAGE_SIZE;
226		bio->bi_io_vec[i].bv_offset = 0;
227	}
228	bio->bi_vcnt = nr_pages;
229	bio->bi_idx = 0;
230	bio->bi_size = nr_pages * PAGE_SIZE;
231	bio->bi_bdev = super->s_bdev;
232	bio->bi_sector = ofs >> 9;
233	bio->bi_private = sb;
234	bio->bi_end_io = erase_end_io;
235	atomic_inc(&super->s_pending_writes);
236	submit_bio(WRITE, bio);
237	return 0;
238}
239
240static int bdev_erase(struct super_block *sb, loff_t to, size_t len,
241		int ensure_write)
242{
243	struct logfs_super *super = logfs_super(sb);
244
245	BUG_ON(to & (PAGE_SIZE - 1));
246	BUG_ON(len & (PAGE_SIZE - 1));
247
248	if (super->s_flags & LOGFS_SB_FLAG_RO)
249		return -EROFS;
250
251	if (ensure_write) {
252		/*
253		 * Object store doesn't care whether erases happen or not.
254		 * But for the journal they are required.  Otherwise a scan
255		 * can find an old commit entry and assume it is the current
256		 * one, travelling back in time.
257		 */
258		do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT);
259	}
260
261	return 0;
262}
263
264static void bdev_sync(struct super_block *sb)
265{
266	struct logfs_super *super = logfs_super(sb);
267
268	wait_event(wq, atomic_read(&super->s_pending_writes) == 0);
269}
270
271static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs)
272{
273	struct logfs_super *super = logfs_super(sb);
274	struct address_space *mapping = super->s_mapping_inode->i_mapping;
275	filler_t *filler = bdev_readpage;
276
277	*ofs = 0;
278	return read_cache_page(mapping, 0, filler, sb);
279}
280
281static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs)
282{
283	struct logfs_super *super = logfs_super(sb);
284	struct address_space *mapping = super->s_mapping_inode->i_mapping;
285	filler_t *filler = bdev_readpage;
286	u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000;
287	pgoff_t index = pos >> PAGE_SHIFT;
288
289	*ofs = pos;
290	return read_cache_page(mapping, index, filler, sb);
291}
292
293static int bdev_write_sb(struct super_block *sb, struct page *page)
294{
295	struct block_device *bdev = logfs_super(sb)->s_bdev;
296
297	/* Nothing special to do for block devices. */
298	return sync_request(page, bdev, WRITE);
299}
300
301static void bdev_put_device(struct super_block *sb)
302{
303	close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
304}
305
306static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
307{
308	return 0;
309}
310
311static const struct logfs_device_ops bd_devops = {
312	.find_first_sb	= bdev_find_first_sb,
313	.find_last_sb	= bdev_find_last_sb,
314	.write_sb	= bdev_write_sb,
315	.readpage	= bdev_readpage,
316	.writeseg	= bdev_writeseg,
317	.erase		= bdev_erase,
318	.can_write_buf	= bdev_can_write_buf,
319	.sync		= bdev_sync,
320	.put_device	= bdev_put_device,
321};
322
323int logfs_get_sb_bdev(struct file_system_type *type, int flags,
324		const char *devname, struct vfsmount *mnt)
325{
326	struct block_device *bdev;
327
328	bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type);
329	if (IS_ERR(bdev))
330		return PTR_ERR(bdev);
331
332	if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
333		int mtdnr = MINOR(bdev->bd_dev);
334		close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
335		return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
336	}
337
338	return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt);
339}
340