1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
4 *
5 * Scatterlist handling helpers.
6 */
7#include <linux/export.h>
8#include <linux/slab.h>
9#include <linux/scatterlist.h>
10#include <linux/highmem.h>
11#include <linux/kmemleak.h>
12#include <linux/bvec.h>
13#include <linux/uio.h>
14
15/**
16 * sg_next - return the next scatterlist entry in a list
17 * @sg:		The current sg entry
18 *
19 * Description:
20 *   Usually the next entry will be @sg@ + 1, but if this sg element is part
21 *   of a chained scatterlist, it could jump to the start of a new
22 *   scatterlist array.
23 *
24 **/
25struct scatterlist *sg_next(struct scatterlist *sg)
26{
27	if (sg_is_last(sg))
28		return NULL;
29
30	sg++;
31	if (unlikely(sg_is_chain(sg)))
32		sg = sg_chain_ptr(sg);
33
34	return sg;
35}
36EXPORT_SYMBOL(sg_next);
37
38/**
39 * sg_nents - return total count of entries in scatterlist
40 * @sg:		The scatterlist
41 *
42 * Description:
43 * Allows to know how many entries are in sg, taking into account
44 * chaining as well
45 *
46 **/
47int sg_nents(struct scatterlist *sg)
48{
49	int nents;
50	for (nents = 0; sg; sg = sg_next(sg))
51		nents++;
52	return nents;
53}
54EXPORT_SYMBOL(sg_nents);
55
56/**
57 * sg_nents_for_len - return total count of entries in scatterlist
58 *                    needed to satisfy the supplied length
59 * @sg:		The scatterlist
60 * @len:	The total required length
61 *
62 * Description:
63 * Determines the number of entries in sg that are required to meet
64 * the supplied length, taking into account chaining as well
65 *
66 * Returns:
67 *   the number of sg entries needed, negative error on failure
68 *
69 **/
70int sg_nents_for_len(struct scatterlist *sg, u64 len)
71{
72	int nents;
73	u64 total;
74
75	if (!len)
76		return 0;
77
78	for (nents = 0, total = 0; sg; sg = sg_next(sg)) {
79		nents++;
80		total += sg->length;
81		if (total >= len)
82			return nents;
83	}
84
85	return -EINVAL;
86}
87EXPORT_SYMBOL(sg_nents_for_len);
88
89/**
90 * sg_last - return the last scatterlist entry in a list
91 * @sgl:	First entry in the scatterlist
92 * @nents:	Number of entries in the scatterlist
93 *
94 * Description:
95 *   Should only be used casually, it (currently) scans the entire list
96 *   to get the last entry.
97 *
98 *   Note that the @sgl@ pointer passed in need not be the first one,
99 *   the important bit is that @nents@ denotes the number of entries that
100 *   exist from @sgl@.
101 *
102 **/
103struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
104{
105	struct scatterlist *sg, *ret = NULL;
106	unsigned int i;
107
108	for_each_sg(sgl, sg, nents, i)
109		ret = sg;
110
111	BUG_ON(!sg_is_last(ret));
112	return ret;
113}
114EXPORT_SYMBOL(sg_last);
115
116/**
117 * sg_init_table - Initialize SG table
118 * @sgl:	   The SG table
119 * @nents:	   Number of entries in table
120 *
121 * Notes:
122 *   If this is part of a chained sg table, sg_mark_end() should be
123 *   used only on the last table part.
124 *
125 **/
126void sg_init_table(struct scatterlist *sgl, unsigned int nents)
127{
128	memset(sgl, 0, sizeof(*sgl) * nents);
129	sg_init_marker(sgl, nents);
130}
131EXPORT_SYMBOL(sg_init_table);
132
133/**
134 * sg_init_one - Initialize a single entry sg list
135 * @sg:		 SG entry
136 * @buf:	 Virtual address for IO
137 * @buflen:	 IO length
138 *
139 **/
140void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
141{
142	sg_init_table(sg, 1);
143	sg_set_buf(sg, buf, buflen);
144}
145EXPORT_SYMBOL(sg_init_one);
146
147/*
148 * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree
149 * helpers.
150 */
151static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
152{
153	if (nents == SG_MAX_SINGLE_ALLOC) {
154		/*
155		 * Kmemleak doesn't track page allocations as they are not
156		 * commonly used (in a raw form) for kernel data structures.
157		 * As we chain together a list of pages and then a normal
158		 * kmalloc (tracked by kmemleak), in order to for that last
159		 * allocation not to become decoupled (and thus a
160		 * false-positive) we need to inform kmemleak of all the
161		 * intermediate allocations.
162		 */
163		void *ptr = (void *) __get_free_page(gfp_mask);
164		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
165		return ptr;
166	} else
167		return kmalloc_array(nents, sizeof(struct scatterlist),
168				     gfp_mask);
169}
170
171static void sg_kfree(struct scatterlist *sg, unsigned int nents)
172{
173	if (nents == SG_MAX_SINGLE_ALLOC) {
174		kmemleak_free(sg);
175		free_page((unsigned long) sg);
176	} else
177		kfree(sg);
178}
179
180/**
181 * __sg_free_table - Free a previously mapped sg table
182 * @table:	The sg table header to use
183 * @max_ents:	The maximum number of entries per single scatterlist
184 * @nents_first_chunk: Number of entries int the (preallocated) first
185 * 	scatterlist chunk, 0 means no such preallocated first chunk
186 * @free_fn:	Free function
187 * @num_ents:	Number of entries in the table
188 *
189 *  Description:
190 *    Free an sg table previously allocated and setup with
191 *    __sg_alloc_table().  The @max_ents value must be identical to
192 *    that previously used with __sg_alloc_table().
193 *
194 **/
195void __sg_free_table(struct sg_table *table, unsigned int max_ents,
196		     unsigned int nents_first_chunk, sg_free_fn *free_fn,
197		     unsigned int num_ents)
198{
199	struct scatterlist *sgl, *next;
200	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
201
202	if (unlikely(!table->sgl))
203		return;
204
205	sgl = table->sgl;
206	while (num_ents) {
207		unsigned int alloc_size = num_ents;
208		unsigned int sg_size;
209
210		/*
211		 * If we have more than max_ents segments left,
212		 * then assign 'next' to the sg table after the current one.
213		 * sg_size is then one less than alloc size, since the last
214		 * element is the chain pointer.
215		 */
216		if (alloc_size > curr_max_ents) {
217			next = sg_chain_ptr(&sgl[curr_max_ents - 1]);
218			alloc_size = curr_max_ents;
219			sg_size = alloc_size - 1;
220		} else {
221			sg_size = alloc_size;
222			next = NULL;
223		}
224
225		num_ents -= sg_size;
226		if (nents_first_chunk)
227			nents_first_chunk = 0;
228		else
229			free_fn(sgl, alloc_size);
230		sgl = next;
231		curr_max_ents = max_ents;
232	}
233
234	table->sgl = NULL;
235}
236EXPORT_SYMBOL(__sg_free_table);
237
238/**
239 * sg_free_append_table - Free a previously allocated append sg table.
240 * @table:	 The mapped sg append table header
241 *
242 **/
243void sg_free_append_table(struct sg_append_table *table)
244{
245	__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
246			table->total_nents);
247}
248EXPORT_SYMBOL(sg_free_append_table);
249
250
251/**
252 * sg_free_table - Free a previously allocated sg table
253 * @table:	The mapped sg table header
254 *
255 **/
256void sg_free_table(struct sg_table *table)
257{
258	__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
259			table->orig_nents);
260}
261EXPORT_SYMBOL(sg_free_table);
262
263/**
264 * __sg_alloc_table - Allocate and initialize an sg table with given allocator
265 * @table:	The sg table header to use
266 * @nents:	Number of entries in sg list
267 * @max_ents:	The maximum number of entries the allocator returns per call
268 * @first_chunk: first SGL if preallocated (may be %NULL)
269 * @nents_first_chunk: Number of entries in the (preallocated) first
270 * 	scatterlist chunk, 0 means no such preallocated chunk provided by user
271 * @gfp_mask:	GFP allocation mask
272 * @alloc_fn:	Allocator to use
273 *
274 * Description:
275 *   This function returns a @table @nents long. The allocator is
276 *   defined to return scatterlist chunks of maximum size @max_ents.
277 *   Thus if @nents is bigger than @max_ents, the scatterlists will be
278 *   chained in units of @max_ents.
279 *
280 * Notes:
281 *   If this function returns non-0 (eg failure), the caller must call
282 *   __sg_free_table() to cleanup any leftover allocations.
283 *
284 **/
285int __sg_alloc_table(struct sg_table *table, unsigned int nents,
286		     unsigned int max_ents, struct scatterlist *first_chunk,
287		     unsigned int nents_first_chunk, gfp_t gfp_mask,
288		     sg_alloc_fn *alloc_fn)
289{
290	struct scatterlist *sg, *prv;
291	unsigned int left;
292	unsigned curr_max_ents = nents_first_chunk ?: max_ents;
293	unsigned prv_max_ents;
294
295	memset(table, 0, sizeof(*table));
296
297	if (nents == 0)
298		return -EINVAL;
299#ifdef CONFIG_ARCH_NO_SG_CHAIN
300	if (WARN_ON_ONCE(nents > max_ents))
301		return -EINVAL;
302#endif
303
304	left = nents;
305	prv = NULL;
306	do {
307		unsigned int sg_size, alloc_size = left;
308
309		if (alloc_size > curr_max_ents) {
310			alloc_size = curr_max_ents;
311			sg_size = alloc_size - 1;
312		} else
313			sg_size = alloc_size;
314
315		left -= sg_size;
316
317		if (first_chunk) {
318			sg = first_chunk;
319			first_chunk = NULL;
320		} else {
321			sg = alloc_fn(alloc_size, gfp_mask);
322		}
323		if (unlikely(!sg)) {
324			/*
325			 * Adjust entry count to reflect that the last
326			 * entry of the previous table won't be used for
327			 * linkage.  Without this, sg_kfree() may get
328			 * confused.
329			 */
330			if (prv)
331				table->nents = ++table->orig_nents;
332
333			return -ENOMEM;
334		}
335
336		sg_init_table(sg, alloc_size);
337		table->nents = table->orig_nents += sg_size;
338
339		/*
340		 * If this is the first mapping, assign the sg table header.
341		 * If this is not the first mapping, chain previous part.
342		 */
343		if (prv)
344			sg_chain(prv, prv_max_ents, sg);
345		else
346			table->sgl = sg;
347
348		/*
349		 * If no more entries after this one, mark the end
350		 */
351		if (!left)
352			sg_mark_end(&sg[sg_size - 1]);
353
354		prv = sg;
355		prv_max_ents = curr_max_ents;
356		curr_max_ents = max_ents;
357	} while (left);
358
359	return 0;
360}
361EXPORT_SYMBOL(__sg_alloc_table);
362
363/**
364 * sg_alloc_table - Allocate and initialize an sg table
365 * @table:	The sg table header to use
366 * @nents:	Number of entries in sg list
367 * @gfp_mask:	GFP allocation mask
368 *
369 *  Description:
370 *    Allocate and initialize an sg table. If @nents@ is larger than
371 *    SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
372 *
373 **/
374int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
375{
376	int ret;
377
378	ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
379			       NULL, 0, gfp_mask, sg_kmalloc);
380	if (unlikely(ret))
381		sg_free_table(table);
382	return ret;
383}
384EXPORT_SYMBOL(sg_alloc_table);
385
386static struct scatterlist *get_next_sg(struct sg_append_table *table,
387				       struct scatterlist *cur,
388				       unsigned long needed_sges,
389				       gfp_t gfp_mask)
390{
391	struct scatterlist *new_sg, *next_sg;
392	unsigned int alloc_size;
393
394	if (cur) {
395		next_sg = sg_next(cur);
396		/* Check if last entry should be keeped for chainning */
397		if (!sg_is_last(next_sg) || needed_sges == 1)
398			return next_sg;
399	}
400
401	alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
402	new_sg = sg_kmalloc(alloc_size, gfp_mask);
403	if (!new_sg)
404		return ERR_PTR(-ENOMEM);
405	sg_init_table(new_sg, alloc_size);
406	if (cur) {
407		table->total_nents += alloc_size - 1;
408		__sg_chain(next_sg, new_sg);
409	} else {
410		table->sgt.sgl = new_sg;
411		table->total_nents = alloc_size;
412	}
413	return new_sg;
414}
415
416static bool pages_are_mergeable(struct page *a, struct page *b)
417{
418	if (page_to_pfn(a) != page_to_pfn(b) + 1)
419		return false;
420	if (!zone_device_pages_have_same_pgmap(a, b))
421		return false;
422	return true;
423}
424
425/**
426 * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
427 *                                    table from an array of pages
428 * @sgt_append:  The sg append table to use
429 * @pages:       Pointer to an array of page pointers
430 * @n_pages:     Number of pages in the pages array
431 * @offset:      Offset from start of the first page to the start of a buffer
432 * @size:        Number of valid bytes in the buffer (after offset)
433 * @max_segment: Maximum size of a scatterlist element in bytes
434 * @left_pages:  Left pages caller have to set after this call
435 * @gfp_mask:	 GFP allocation mask
436 *
437 * Description:
438 *    In the first call it allocate and initialize an sg table from a list of
439 *    pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
440 *    the pages are squashed into a single scatterlist entry up to the maximum
441 *    size specified in @max_segment.  A user may provide an offset at a start
442 *    and a size of valid data in a buffer specified by the page array. The
443 *    returned sg table is released by sg_free_append_table
444 *
445 * Returns:
446 *   0 on success, negative error on failure
447 *
448 * Notes:
449 *   If this function returns non-0 (eg failure), the caller must call
450 *   sg_free_append_table() to cleanup any leftover allocations.
451 *
452 *   In the fist call, sgt_append must by initialized.
453 */
454int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
455		struct page **pages, unsigned int n_pages, unsigned int offset,
456		unsigned long size, unsigned int max_segment,
457		unsigned int left_pages, gfp_t gfp_mask)
458{
459	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
460	unsigned int added_nents = 0;
461	struct scatterlist *s = sgt_append->prv;
462	struct page *last_pg;
463
464	/*
465	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
466	 * otherwise it can overshoot.
467	 */
468	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
469	if (WARN_ON(max_segment < PAGE_SIZE))
470		return -EINVAL;
471
472	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
473		return -EOPNOTSUPP;
474
475	if (sgt_append->prv) {
476		unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) +
477			sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE;
478
479		if (WARN_ON(offset))
480			return -EINVAL;
481
482		/* Merge contiguous pages into the last SG */
483		prv_len = sgt_append->prv->length;
484		if (page_to_pfn(pages[0]) == next_pfn) {
485			last_pg = pfn_to_page(next_pfn - 1);
486			while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
487				if (sgt_append->prv->length + PAGE_SIZE > max_segment)
488					break;
489				sgt_append->prv->length += PAGE_SIZE;
490				last_pg = pages[0];
491				pages++;
492				n_pages--;
493			}
494			if (!n_pages)
495				goto out;
496		}
497	}
498
499	/* compute number of contiguous chunks */
500	chunks = 1;
501	seg_len = 0;
502	for (i = 1; i < n_pages; i++) {
503		seg_len += PAGE_SIZE;
504		if (seg_len >= max_segment ||
505		    !pages_are_mergeable(pages[i], pages[i - 1])) {
506			chunks++;
507			seg_len = 0;
508		}
509	}
510
511	/* merging chunks and putting them into the scatterlist */
512	cur_page = 0;
513	for (i = 0; i < chunks; i++) {
514		unsigned int j, chunk_size;
515
516		/* look for the end of the current chunk */
517		seg_len = 0;
518		for (j = cur_page + 1; j < n_pages; j++) {
519			seg_len += PAGE_SIZE;
520			if (seg_len >= max_segment ||
521			    !pages_are_mergeable(pages[j], pages[j - 1]))
522				break;
523		}
524
525		/* Pass how many chunks might be left */
526		s = get_next_sg(sgt_append, s, chunks - i + left_pages,
527				gfp_mask);
528		if (IS_ERR(s)) {
529			/*
530			 * Adjust entry length to be as before function was
531			 * called.
532			 */
533			if (sgt_append->prv)
534				sgt_append->prv->length = prv_len;
535			return PTR_ERR(s);
536		}
537		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
538		sg_set_page(s, pages[cur_page],
539			    min_t(unsigned long, size, chunk_size), offset);
540		added_nents++;
541		size -= chunk_size;
542		offset = 0;
543		cur_page = j;
544	}
545	sgt_append->sgt.nents += added_nents;
546	sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
547	sgt_append->prv = s;
548out:
549	if (!left_pages)
550		sg_mark_end(s);
551	return 0;
552}
553EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
554
555/**
556 * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
557 *                                     an array of pages and given maximum
558 *                                     segment.
559 * @sgt:	 The sg table header to use
560 * @pages:	 Pointer to an array of page pointers
561 * @n_pages:	 Number of pages in the pages array
562 * @offset:      Offset from start of the first page to the start of a buffer
563 * @size:        Number of valid bytes in the buffer (after offset)
564 * @max_segment: Maximum size of a scatterlist element in bytes
565 * @gfp_mask:	 GFP allocation mask
566 *
567 *  Description:
568 *    Allocate and initialize an sg table from a list of pages. Contiguous
569 *    ranges of the pages are squashed into a single scatterlist node up to the
570 *    maximum size specified in @max_segment. A user may provide an offset at a
571 *    start and a size of valid data in a buffer specified by the page array.
572 *
573 *    The returned sg table is released by sg_free_table.
574 *
575 *  Returns:
576 *   0 on success, negative error on failure
577 */
578int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
579				unsigned int n_pages, unsigned int offset,
580				unsigned long size, unsigned int max_segment,
581				gfp_t gfp_mask)
582{
583	struct sg_append_table append = {};
584	int err;
585
586	err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
587					       size, max_segment, 0, gfp_mask);
588	if (err) {
589		sg_free_append_table(&append);
590		return err;
591	}
592	memcpy(sgt, &append.sgt, sizeof(*sgt));
593	WARN_ON(append.total_nents != sgt->orig_nents);
594	return 0;
595}
596EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
597
598#ifdef CONFIG_SGL_ALLOC
599
600/**
601 * sgl_alloc_order - allocate a scatterlist and its pages
602 * @length: Length in bytes of the scatterlist. Must be at least one
603 * @order: Second argument for alloc_pages()
604 * @chainable: Whether or not to allocate an extra element in the scatterlist
605 *	for scatterlist chaining purposes
606 * @gfp: Memory allocation flags
607 * @nent_p: [out] Number of entries in the scatterlist that have pages
608 *
609 * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
610 */
611struct scatterlist *sgl_alloc_order(unsigned long long length,
612				    unsigned int order, bool chainable,
613				    gfp_t gfp, unsigned int *nent_p)
614{
615	struct scatterlist *sgl, *sg;
616	struct page *page;
617	unsigned int nent, nalloc;
618	u32 elem_len;
619
620	nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order);
621	/* Check for integer overflow */
622	if (length > (nent << (PAGE_SHIFT + order)))
623		return NULL;
624	nalloc = nent;
625	if (chainable) {
626		/* Check for integer overflow */
627		if (nalloc + 1 < nalloc)
628			return NULL;
629		nalloc++;
630	}
631	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
632			    gfp & ~GFP_DMA);
633	if (!sgl)
634		return NULL;
635
636	sg_init_table(sgl, nalloc);
637	sg = sgl;
638	while (length) {
639		elem_len = min_t(u64, length, PAGE_SIZE << order);
640		page = alloc_pages(gfp, order);
641		if (!page) {
642			sgl_free_order(sgl, order);
643			return NULL;
644		}
645
646		sg_set_page(sg, page, elem_len, 0);
647		length -= elem_len;
648		sg = sg_next(sg);
649	}
650	WARN_ONCE(length, "length = %lld\n", length);
651	if (nent_p)
652		*nent_p = nent;
653	return sgl;
654}
655EXPORT_SYMBOL(sgl_alloc_order);
656
657/**
658 * sgl_alloc - allocate a scatterlist and its pages
659 * @length: Length in bytes of the scatterlist
660 * @gfp: Memory allocation flags
661 * @nent_p: [out] Number of entries in the scatterlist
662 *
663 * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
664 */
665struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
666			      unsigned int *nent_p)
667{
668	return sgl_alloc_order(length, 0, false, gfp, nent_p);
669}
670EXPORT_SYMBOL(sgl_alloc);
671
672/**
673 * sgl_free_n_order - free a scatterlist and its pages
674 * @sgl: Scatterlist with one or more elements
675 * @nents: Maximum number of elements to free
676 * @order: Second argument for __free_pages()
677 *
678 * Notes:
679 * - If several scatterlists have been chained and each chain element is
680 *   freed separately then it's essential to set nents correctly to avoid that a
681 *   page would get freed twice.
682 * - All pages in a chained scatterlist can be freed at once by setting @nents
683 *   to a high number.
684 */
685void sgl_free_n_order(struct scatterlist *sgl, int nents, int order)
686{
687	struct scatterlist *sg;
688	struct page *page;
689	int i;
690
691	for_each_sg(sgl, sg, nents, i) {
692		if (!sg)
693			break;
694		page = sg_page(sg);
695		if (page)
696			__free_pages(page, order);
697	}
698	kfree(sgl);
699}
700EXPORT_SYMBOL(sgl_free_n_order);
701
702/**
703 * sgl_free_order - free a scatterlist and its pages
704 * @sgl: Scatterlist with one or more elements
705 * @order: Second argument for __free_pages()
706 */
707void sgl_free_order(struct scatterlist *sgl, int order)
708{
709	sgl_free_n_order(sgl, INT_MAX, order);
710}
711EXPORT_SYMBOL(sgl_free_order);
712
713/**
714 * sgl_free - free a scatterlist and its pages
715 * @sgl: Scatterlist with one or more elements
716 */
717void sgl_free(struct scatterlist *sgl)
718{
719	sgl_free_order(sgl, 0);
720}
721EXPORT_SYMBOL(sgl_free);
722
723#endif /* CONFIG_SGL_ALLOC */
724
725void __sg_page_iter_start(struct sg_page_iter *piter,
726			  struct scatterlist *sglist, unsigned int nents,
727			  unsigned long pgoffset)
728{
729	piter->__pg_advance = 0;
730	piter->__nents = nents;
731
732	piter->sg = sglist;
733	piter->sg_pgoffset = pgoffset;
734}
735EXPORT_SYMBOL(__sg_page_iter_start);
736
737static int sg_page_count(struct scatterlist *sg)
738{
739	return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
740}
741
742bool __sg_page_iter_next(struct sg_page_iter *piter)
743{
744	if (!piter->__nents || !piter->sg)
745		return false;
746
747	piter->sg_pgoffset += piter->__pg_advance;
748	piter->__pg_advance = 1;
749
750	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
751		piter->sg_pgoffset -= sg_page_count(piter->sg);
752		piter->sg = sg_next(piter->sg);
753		if (!--piter->__nents || !piter->sg)
754			return false;
755	}
756
757	return true;
758}
759EXPORT_SYMBOL(__sg_page_iter_next);
760
761static int sg_dma_page_count(struct scatterlist *sg)
762{
763	return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
764}
765
766bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
767{
768	struct sg_page_iter *piter = &dma_iter->base;
769
770	if (!piter->__nents || !piter->sg)
771		return false;
772
773	piter->sg_pgoffset += piter->__pg_advance;
774	piter->__pg_advance = 1;
775
776	while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
777		piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
778		piter->sg = sg_next(piter->sg);
779		if (!--piter->__nents || !piter->sg)
780			return false;
781	}
782
783	return true;
784}
785EXPORT_SYMBOL(__sg_page_iter_dma_next);
786
787/**
788 * sg_miter_start - start mapping iteration over a sg list
789 * @miter: sg mapping iter to be started
790 * @sgl: sg list to iterate over
791 * @nents: number of sg entries
792 * @flags: sg iterator flags
793 *
794 * Description:
795 *   Starts mapping iterator @miter.
796 *
797 * Context:
798 *   Don't care.
799 */
800void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
801		    unsigned int nents, unsigned int flags)
802{
803	memset(miter, 0, sizeof(struct sg_mapping_iter));
804
805	__sg_page_iter_start(&miter->piter, sgl, nents, 0);
806	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
807	miter->__flags = flags;
808}
809EXPORT_SYMBOL(sg_miter_start);
810
811static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
812{
813	if (!miter->__remaining) {
814		struct scatterlist *sg;
815
816		if (!__sg_page_iter_next(&miter->piter))
817			return false;
818
819		sg = miter->piter.sg;
820
821		miter->__offset = miter->piter.sg_pgoffset ? 0 : sg->offset;
822		miter->piter.sg_pgoffset += miter->__offset >> PAGE_SHIFT;
823		miter->__offset &= PAGE_SIZE - 1;
824		miter->__remaining = sg->offset + sg->length -
825				     (miter->piter.sg_pgoffset << PAGE_SHIFT) -
826				     miter->__offset;
827		miter->__remaining = min_t(unsigned long, miter->__remaining,
828					   PAGE_SIZE - miter->__offset);
829	}
830
831	return true;
832}
833
834/**
835 * sg_miter_skip - reposition mapping iterator
836 * @miter: sg mapping iter to be skipped
837 * @offset: number of bytes to plus the current location
838 *
839 * Description:
840 *   Sets the offset of @miter to its current location plus @offset bytes.
841 *   If mapping iterator @miter has been proceeded by sg_miter_next(), this
842 *   stops @miter.
843 *
844 * Context:
845 *   Don't care.
846 *
847 * Returns:
848 *   true if @miter contains the valid mapping.  false if end of sg
849 *   list is reached.
850 */
851bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset)
852{
853	sg_miter_stop(miter);
854
855	while (offset) {
856		off_t consumed;
857
858		if (!sg_miter_get_next_page(miter))
859			return false;
860
861		consumed = min_t(off_t, offset, miter->__remaining);
862		miter->__offset += consumed;
863		miter->__remaining -= consumed;
864		offset -= consumed;
865	}
866
867	return true;
868}
869EXPORT_SYMBOL(sg_miter_skip);
870
871/**
872 * sg_miter_next - proceed mapping iterator to the next mapping
873 * @miter: sg mapping iter to proceed
874 *
875 * Description:
876 *   Proceeds @miter to the next mapping.  @miter should have been started
877 *   using sg_miter_start().  On successful return, @miter->page,
878 *   @miter->addr and @miter->length point to the current mapping.
879 *
880 * Context:
881 *   May sleep if !SG_MITER_ATOMIC.
882 *
883 * Returns:
884 *   true if @miter contains the next mapping.  false if end of sg
885 *   list is reached.
886 */
887bool sg_miter_next(struct sg_mapping_iter *miter)
888{
889	sg_miter_stop(miter);
890
891	/*
892	 * Get to the next page if necessary.
893	 * __remaining, __offset is adjusted by sg_miter_stop
894	 */
895	if (!sg_miter_get_next_page(miter))
896		return false;
897
898	miter->page = sg_page_iter_page(&miter->piter);
899	miter->consumed = miter->length = miter->__remaining;
900
901	if (miter->__flags & SG_MITER_ATOMIC)
902		miter->addr = kmap_atomic(miter->page) + miter->__offset;
903	else
904		miter->addr = kmap(miter->page) + miter->__offset;
905
906	return true;
907}
908EXPORT_SYMBOL(sg_miter_next);
909
910/**
911 * sg_miter_stop - stop mapping iteration
912 * @miter: sg mapping iter to be stopped
913 *
914 * Description:
915 *   Stops mapping iterator @miter.  @miter should have been started
916 *   using sg_miter_start().  A stopped iteration can be resumed by
917 *   calling sg_miter_next() on it.  This is useful when resources (kmap)
918 *   need to be released during iteration.
919 *
920 * Context:
921 *   Don't care otherwise.
922 */
923void sg_miter_stop(struct sg_mapping_iter *miter)
924{
925	WARN_ON(miter->consumed > miter->length);
926
927	/* drop resources from the last iteration */
928	if (miter->addr) {
929		miter->__offset += miter->consumed;
930		miter->__remaining -= miter->consumed;
931
932		if (miter->__flags & SG_MITER_TO_SG)
933			flush_dcache_page(miter->page);
934
935		if (miter->__flags & SG_MITER_ATOMIC) {
936			WARN_ON_ONCE(!pagefault_disabled());
937			kunmap_atomic(miter->addr);
938		} else
939			kunmap(miter->page);
940
941		miter->page = NULL;
942		miter->addr = NULL;
943		miter->length = 0;
944		miter->consumed = 0;
945	}
946}
947EXPORT_SYMBOL(sg_miter_stop);
948
949/**
950 * sg_copy_buffer - Copy data between a linear buffer and an SG list
951 * @sgl:		 The SG list
952 * @nents:		 Number of SG entries
953 * @buf:		 Where to copy from
954 * @buflen:		 The number of bytes to copy
955 * @skip:		 Number of bytes to skip before copying
956 * @to_buffer:		 transfer direction (true == from an sg list to a
957 *			 buffer, false == from a buffer to an sg list)
958 *
959 * Returns the number of copied bytes.
960 *
961 **/
962size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
963		      size_t buflen, off_t skip, bool to_buffer)
964{
965	unsigned int offset = 0;
966	struct sg_mapping_iter miter;
967	unsigned int sg_flags = SG_MITER_ATOMIC;
968
969	if (to_buffer)
970		sg_flags |= SG_MITER_FROM_SG;
971	else
972		sg_flags |= SG_MITER_TO_SG;
973
974	sg_miter_start(&miter, sgl, nents, sg_flags);
975
976	if (!sg_miter_skip(&miter, skip))
977		return 0;
978
979	while ((offset < buflen) && sg_miter_next(&miter)) {
980		unsigned int len;
981
982		len = min(miter.length, buflen - offset);
983
984		if (to_buffer)
985			memcpy(buf + offset, miter.addr, len);
986		else
987			memcpy(miter.addr, buf + offset, len);
988
989		offset += len;
990	}
991
992	sg_miter_stop(&miter);
993
994	return offset;
995}
996EXPORT_SYMBOL(sg_copy_buffer);
997
998/**
999 * sg_copy_from_buffer - Copy from a linear buffer to an SG list
1000 * @sgl:		 The SG list
1001 * @nents:		 Number of SG entries
1002 * @buf:		 Where to copy from
1003 * @buflen:		 The number of bytes to copy
1004 *
1005 * Returns the number of copied bytes.
1006 *
1007 **/
1008size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1009			   const void *buf, size_t buflen)
1010{
1011	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, 0, false);
1012}
1013EXPORT_SYMBOL(sg_copy_from_buffer);
1014
1015/**
1016 * sg_copy_to_buffer - Copy from an SG list to a linear buffer
1017 * @sgl:		 The SG list
1018 * @nents:		 Number of SG entries
1019 * @buf:		 Where to copy to
1020 * @buflen:		 The number of bytes to copy
1021 *
1022 * Returns the number of copied bytes.
1023 *
1024 **/
1025size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1026			 void *buf, size_t buflen)
1027{
1028	return sg_copy_buffer(sgl, nents, buf, buflen, 0, true);
1029}
1030EXPORT_SYMBOL(sg_copy_to_buffer);
1031
1032/**
1033 * sg_pcopy_from_buffer - Copy from a linear buffer to an SG list
1034 * @sgl:		 The SG list
1035 * @nents:		 Number of SG entries
1036 * @buf:		 Where to copy from
1037 * @buflen:		 The number of bytes to copy
1038 * @skip:		 Number of bytes to skip before copying
1039 *
1040 * Returns the number of copied bytes.
1041 *
1042 **/
1043size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
1044			    const void *buf, size_t buflen, off_t skip)
1045{
1046	return sg_copy_buffer(sgl, nents, (void *)buf, buflen, skip, false);
1047}
1048EXPORT_SYMBOL(sg_pcopy_from_buffer);
1049
1050/**
1051 * sg_pcopy_to_buffer - Copy from an SG list to a linear buffer
1052 * @sgl:		 The SG list
1053 * @nents:		 Number of SG entries
1054 * @buf:		 Where to copy to
1055 * @buflen:		 The number of bytes to copy
1056 * @skip:		 Number of bytes to skip before copying
1057 *
1058 * Returns the number of copied bytes.
1059 *
1060 **/
1061size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
1062			  void *buf, size_t buflen, off_t skip)
1063{
1064	return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
1065}
1066EXPORT_SYMBOL(sg_pcopy_to_buffer);
1067
1068/**
1069 * sg_zero_buffer - Zero-out a part of a SG list
1070 * @sgl:		 The SG list
1071 * @nents:		 Number of SG entries
1072 * @buflen:		 The number of bytes to zero out
1073 * @skip:		 Number of bytes to skip before zeroing
1074 *
1075 * Returns the number of bytes zeroed.
1076 **/
1077size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
1078		       size_t buflen, off_t skip)
1079{
1080	unsigned int offset = 0;
1081	struct sg_mapping_iter miter;
1082	unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
1083
1084	sg_miter_start(&miter, sgl, nents, sg_flags);
1085
1086	if (!sg_miter_skip(&miter, skip))
1087		return false;
1088
1089	while (offset < buflen && sg_miter_next(&miter)) {
1090		unsigned int len;
1091
1092		len = min(miter.length, buflen - offset);
1093		memset(miter.addr, 0, len);
1094
1095		offset += len;
1096	}
1097
1098	sg_miter_stop(&miter);
1099	return offset;
1100}
1101EXPORT_SYMBOL(sg_zero_buffer);
1102
1103/*
1104 * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
1105 * iterators, and add them to the scatterlist.
1106 */
1107static ssize_t extract_user_to_sg(struct iov_iter *iter,
1108				  ssize_t maxsize,
1109				  struct sg_table *sgtable,
1110				  unsigned int sg_max,
1111				  iov_iter_extraction_t extraction_flags)
1112{
1113	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1114	struct page **pages;
1115	unsigned int npages;
1116	ssize_t ret = 0, res;
1117	size_t len, off;
1118
1119	/* We decant the page list into the tail of the scatterlist */
1120	pages = (void *)sgtable->sgl +
1121		array_size(sg_max, sizeof(struct scatterlist));
1122	pages -= sg_max;
1123
1124	do {
1125		res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
1126					     extraction_flags, &off);
1127		if (res < 0)
1128			goto failed;
1129
1130		len = res;
1131		maxsize -= len;
1132		ret += len;
1133		npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
1134		sg_max -= npages;
1135
1136		for (; npages > 0; npages--) {
1137			struct page *page = *pages;
1138			size_t seg = min_t(size_t, PAGE_SIZE - off, len);
1139
1140			*pages++ = NULL;
1141			sg_set_page(sg, page, seg, off);
1142			sgtable->nents++;
1143			sg++;
1144			len -= seg;
1145			off = 0;
1146		}
1147	} while (maxsize > 0 && sg_max > 0);
1148
1149	return ret;
1150
1151failed:
1152	while (sgtable->nents > sgtable->orig_nents)
1153		unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
1154	return res;
1155}
1156
1157/*
1158 * Extract up to sg_max pages from a BVEC-type iterator and add them to the
1159 * scatterlist.  The pages are not pinned.
1160 */
1161static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
1162				  ssize_t maxsize,
1163				  struct sg_table *sgtable,
1164				  unsigned int sg_max,
1165				  iov_iter_extraction_t extraction_flags)
1166{
1167	const struct bio_vec *bv = iter->bvec;
1168	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1169	unsigned long start = iter->iov_offset;
1170	unsigned int i;
1171	ssize_t ret = 0;
1172
1173	for (i = 0; i < iter->nr_segs; i++) {
1174		size_t off, len;
1175
1176		len = bv[i].bv_len;
1177		if (start >= len) {
1178			start -= len;
1179			continue;
1180		}
1181
1182		len = min_t(size_t, maxsize, len - start);
1183		off = bv[i].bv_offset + start;
1184
1185		sg_set_page(sg, bv[i].bv_page, len, off);
1186		sgtable->nents++;
1187		sg++;
1188		sg_max--;
1189
1190		ret += len;
1191		maxsize -= len;
1192		if (maxsize <= 0 || sg_max == 0)
1193			break;
1194		start = 0;
1195	}
1196
1197	if (ret > 0)
1198		iov_iter_advance(iter, ret);
1199	return ret;
1200}
1201
1202/*
1203 * Extract up to sg_max pages from a KVEC-type iterator and add them to the
1204 * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
1205 * static buffers.  The pages are not pinned.
1206 */
1207static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
1208				  ssize_t maxsize,
1209				  struct sg_table *sgtable,
1210				  unsigned int sg_max,
1211				  iov_iter_extraction_t extraction_flags)
1212{
1213	const struct kvec *kv = iter->kvec;
1214	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1215	unsigned long start = iter->iov_offset;
1216	unsigned int i;
1217	ssize_t ret = 0;
1218
1219	for (i = 0; i < iter->nr_segs; i++) {
1220		struct page *page;
1221		unsigned long kaddr;
1222		size_t off, len, seg;
1223
1224		len = kv[i].iov_len;
1225		if (start >= len) {
1226			start -= len;
1227			continue;
1228		}
1229
1230		kaddr = (unsigned long)kv[i].iov_base + start;
1231		off = kaddr & ~PAGE_MASK;
1232		len = min_t(size_t, maxsize, len - start);
1233		kaddr &= PAGE_MASK;
1234
1235		maxsize -= len;
1236		ret += len;
1237		do {
1238			seg = min_t(size_t, len, PAGE_SIZE - off);
1239			if (is_vmalloc_or_module_addr((void *)kaddr))
1240				page = vmalloc_to_page((void *)kaddr);
1241			else
1242				page = virt_to_page((void *)kaddr);
1243
1244			sg_set_page(sg, page, len, off);
1245			sgtable->nents++;
1246			sg++;
1247			sg_max--;
1248
1249			len -= seg;
1250			kaddr += PAGE_SIZE;
1251			off = 0;
1252		} while (len > 0 && sg_max > 0);
1253
1254		if (maxsize <= 0 || sg_max == 0)
1255			break;
1256		start = 0;
1257	}
1258
1259	if (ret > 0)
1260		iov_iter_advance(iter, ret);
1261	return ret;
1262}
1263
1264/*
1265 * Extract up to sg_max folios from an XARRAY-type iterator and add them to
1266 * the scatterlist.  The pages are not pinned.
1267 */
1268static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
1269				    ssize_t maxsize,
1270				    struct sg_table *sgtable,
1271				    unsigned int sg_max,
1272				    iov_iter_extraction_t extraction_flags)
1273{
1274	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
1275	struct xarray *xa = iter->xarray;
1276	struct folio *folio;
1277	loff_t start = iter->xarray_start + iter->iov_offset;
1278	pgoff_t index = start / PAGE_SIZE;
1279	ssize_t ret = 0;
1280	size_t offset, len;
1281	XA_STATE(xas, xa, index);
1282
1283	rcu_read_lock();
1284
1285	xas_for_each(&xas, folio, ULONG_MAX) {
1286		if (xas_retry(&xas, folio))
1287			continue;
1288		if (WARN_ON(xa_is_value(folio)))
1289			break;
1290		if (WARN_ON(folio_test_hugetlb(folio)))
1291			break;
1292
1293		offset = offset_in_folio(folio, start);
1294		len = min_t(size_t, maxsize, folio_size(folio) - offset);
1295
1296		sg_set_page(sg, folio_page(folio, 0), len, offset);
1297		sgtable->nents++;
1298		sg++;
1299		sg_max--;
1300
1301		maxsize -= len;
1302		ret += len;
1303		if (maxsize <= 0 || sg_max == 0)
1304			break;
1305	}
1306
1307	rcu_read_unlock();
1308	if (ret > 0)
1309		iov_iter_advance(iter, ret);
1310	return ret;
1311}
1312
1313/**
1314 * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
1315 * @iter: The iterator to extract from
1316 * @maxsize: The amount of iterator to copy
1317 * @sgtable: The scatterlist table to fill in
1318 * @sg_max: Maximum number of elements in @sgtable that may be filled
1319 * @extraction_flags: Flags to qualify the request
1320 *
1321 * Extract the page fragments from the given amount of the source iterator and
1322 * add them to a scatterlist that refers to all of those bits, to a maximum
1323 * addition of @sg_max elements.
1324 *
1325 * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
1326 * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
1327 * and DISCARD-type are not supported.
1328 *
1329 * No end mark is placed on the scatterlist; that's left to the caller.
1330 *
1331 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
1332 * be allowed on the pages extracted.
1333 *
1334 * If successful, @sgtable->nents is updated to include the number of elements
1335 * added and the number of bytes added is returned.  @sgtable->orig_nents is
1336 * left unaltered.
1337 *
1338 * The iov_iter_extract_mode() function should be used to query how cleanup
1339 * should be performed.
1340 */
1341ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
1342			   struct sg_table *sgtable, unsigned int sg_max,
1343			   iov_iter_extraction_t extraction_flags)
1344{
1345	if (maxsize == 0)
1346		return 0;
1347
1348	switch (iov_iter_type(iter)) {
1349	case ITER_UBUF:
1350	case ITER_IOVEC:
1351		return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
1352					  extraction_flags);
1353	case ITER_BVEC:
1354		return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
1355					  extraction_flags);
1356	case ITER_KVEC:
1357		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
1358					  extraction_flags);
1359	case ITER_XARRAY:
1360		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
1361					    extraction_flags);
1362	default:
1363		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
1364		WARN_ON_ONCE(1);
1365		return -EIO;
1366	}
1367}
1368EXPORT_SYMBOL_GPL(extract_iter_to_sg);
1369