1190207Srpaulo/* SPDX-License-Identifier: GPL-2.0 */
275115Sfenner
375115Sfenner#ifndef _NET_PAGE_POOL_TYPES_H
475115Sfenner#define _NET_PAGE_POOL_TYPES_H
575115Sfenner
675115Sfenner#include <linux/dma-direction.h>
775115Sfenner#include <linux/ptr_ring.h>
875115Sfenner#include <linux/types.h>
975115Sfenner
1075115Sfenner#define PP_FLAG_DMA_MAP		BIT(0) /* Should page_pool do the DMA
1175115Sfenner					* map/unmap
1275115Sfenner					*/
1375115Sfenner#define PP_FLAG_DMA_SYNC_DEV	BIT(1) /* If set all pages that the driver gets
1475115Sfenner					* from page_pool will be
1575115Sfenner					* DMA-synced-for-device according to
1675115Sfenner					* the length provided by the device
1775115Sfenner					* driver.
1875115Sfenner					* Please note DMA-sync-for-CPU is still
1975115Sfenner					* device driver responsibility
2075115Sfenner					*/
2175115Sfenner#define PP_FLAG_SYSTEM_POOL	BIT(2) /* Global system page_pool */
2275115Sfenner#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
2375115Sfenner				 PP_FLAG_SYSTEM_POOL)
2475115Sfenner
2575115Sfenner/*
2675115Sfenner * Fast allocation side cache array/stack
2775115Sfenner *
2875115Sfenner * The cache size and refill watermark is related to the network
2975115Sfenner * use-case.  The NAPI budget is 64 packets.  After a NAPI poll the RX
3075115Sfenner * ring is usually refilled and the max consumed elements will be 64,
3175115Sfenner * thus a natural max size of objects needed in the cache.
3275115Sfenner *
3375115Sfenner * Keeping room for more objects, is due to XDP_DROP use-case.  As
3475115Sfenner * XDP_DROP allows the opportunity to recycle objects directly into
3575115Sfenner * this array, as it shares the same softirq/NAPI protection.  If
3675115Sfenner * cache is already full (or partly full) then the XDP_DROP recycles
3775115Sfenner * would have to take a slower code path.
3875115Sfenner */
3975115Sfenner#define PP_ALLOC_CACHE_SIZE	128
4075115Sfenner#define PP_ALLOC_CACHE_REFILL	64
4175115Sfennerstruct pp_alloc_cache {
4275115Sfenner	u32 count;
4375115Sfenner	struct page *cache[PP_ALLOC_CACHE_SIZE];
4475115Sfenner};
4575115Sfenner
4675115Sfenner/**
4775115Sfenner * struct page_pool_params - page pool parameters
4875115Sfenner * @flags:	PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV
4975115Sfenner * @order:	2^order pages on allocation
5075115Sfenner * @pool_size:	size of the ptr_ring
5175115Sfenner * @nid:	NUMA node id to allocate from pages from
5275115Sfenner * @dev:	device, for DMA pre-mapping purposes
5375115Sfenner * @netdev:	netdev this pool will serve (leave as NULL if none or multiple)
5475115Sfenner * @napi:	NAPI which is the sole consumer of pages, otherwise NULL
5575115Sfenner * @dma_dir:	DMA mapping direction
5675115Sfenner * @max_len:	max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
5775115Sfenner * @offset:	DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
5875115Sfenner */
5975115Sfennerstruct page_pool_params {
6075115Sfenner	struct_group_tagged(page_pool_params_fast, fast,
6175115Sfenner		unsigned int	flags;
6275115Sfenner		unsigned int	order;
6375115Sfenner		unsigned int	pool_size;
6475115Sfenner		int		nid;
6575115Sfenner		struct device	*dev;
6675115Sfenner		struct napi_struct *napi;
6775115Sfenner		enum dma_data_direction dma_dir;
6875115Sfenner		unsigned int	max_len;
6975115Sfenner		unsigned int	offset;
7075115Sfenner	);
7175115Sfenner	struct_group_tagged(page_pool_params_slow, slow,
7275115Sfenner		struct net_device *netdev;
7375115Sfenner/* private: used by test code only */
7475115Sfenner		void (*init_callback)(struct page *page, void *arg);
7575115Sfenner		void *init_arg;
7675115Sfenner	);
7775115Sfenner};
7875115Sfenner
7975115Sfenner#ifdef CONFIG_PAGE_POOL_STATS
8075115Sfenner/**
8175115Sfenner * struct page_pool_alloc_stats - allocation statistics
8275115Sfenner * @fast:	successful fast path allocations
8375115Sfenner * @slow:	slow path order-0 allocations
8475115Sfenner * @slow_high_order: slow path high order allocations
8575115Sfenner * @empty:	ptr ring is empty, so a slow path allocation was forced
8675115Sfenner * @refill:	an allocation which triggered a refill of the cache
8775115Sfenner * @waive:	pages obtained from the ptr ring that cannot be added to
8875115Sfenner *		the cache due to a NUMA mismatch
8975115Sfenner */
9075115Sfennerstruct page_pool_alloc_stats {
9175115Sfenner	u64 fast;
9275115Sfenner	u64 slow;
9375115Sfenner	u64 slow_high_order;
9475115Sfenner	u64 empty;
9575115Sfenner	u64 refill;
9675115Sfenner	u64 waive;
9775115Sfenner};
9875115Sfenner
9975115Sfenner/**
10075115Sfenner * struct page_pool_recycle_stats - recycling (freeing) statistics
10175115Sfenner * @cached:	recycling placed page in the page pool cache
10275115Sfenner * @cache_full:	page pool cache was full
10375115Sfenner * @ring:	page placed into the ptr ring
10475115Sfenner * @ring_full:	page released from page pool because the ptr ring was full
105172683Smlaier * @released_refcnt:	page released (and not recycled) because refcnt > 1
10675115Sfenner */
10775115Sfennerstruct page_pool_recycle_stats {
10875115Sfenner	u64 cached;
10975115Sfenner	u64 cache_full;
110146773Ssam	u64 ring;
11175115Sfenner	u64 ring_full;
11275115Sfenner	u64 released_refcnt;
11375115Sfenner};
11475115Sfenner
11575115Sfenner/**
11675115Sfenner * struct page_pool_stats - combined page pool use statistics
11775115Sfenner * @alloc_stats:	see struct page_pool_alloc_stats
11875115Sfenner * @recycle_stats:	see struct page_pool_recycle_stats
11975115Sfenner *
12075115Sfenner * Wrapper struct for combining page pool stats with different storage
12175115Sfenner * requirements.
12275115Sfenner */
12375115Sfennerstruct page_pool_stats {
12475115Sfenner	struct page_pool_alloc_stats alloc_stats;
12575115Sfenner	struct page_pool_recycle_stats recycle_stats;
12675115Sfenner};
12775115Sfenner#endif
12875115Sfenner
12975115Sfennerstruct page_pool {
13075115Sfenner	struct page_pool_params_fast p;
13175115Sfenner
13275115Sfenner	int cpuid;
13375115Sfenner	bool has_init_callback;
13475115Sfenner
13575115Sfenner	long frag_users;
13675115Sfenner	struct page *frag_page;
13775115Sfenner	unsigned int frag_offset;
13875115Sfenner	u32 pages_state_hold_cnt;
13975115Sfenner
14075115Sfenner	struct delayed_work release_dw;
14175115Sfenner	void (*disconnect)(void *pool);
14275115Sfenner	unsigned long defer_start;
14375115Sfenner	unsigned long defer_warn;
14475115Sfenner
14575115Sfenner#ifdef CONFIG_PAGE_POOL_STATS
14675115Sfenner	/* these stats are incremented while in softirq context */
14775115Sfenner	struct page_pool_alloc_stats alloc_stats;
14875115Sfenner#endif
14975115Sfenner	u32 xdp_mem_id;
15075115Sfenner
15175115Sfenner	/*
15275115Sfenner	 * Data structure for allocation side
15375115Sfenner	 *
15475115Sfenner	 * Drivers allocation side usually already perform some kind
15575115Sfenner	 * of resource protection.  Piggyback on this protection, and
15675115Sfenner	 * require driver to protect allocation side.
15775115Sfenner	 *
15875115Sfenner	 * For NIC drivers this means, allocate a page_pool per
15975115Sfenner	 * RX-queue. As the RX-queue is already protected by
16075115Sfenner	 * Softirq/BH scheduling and napi_schedule. NAPI schedule
16175115Sfenner	 * guarantee that a single napi_struct will only be scheduled
162127668Sbms	 * on a single CPU (see napi_schedule).
163127668Sbms	 */
164127668Sbms	struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
165
166	/* Data structure for storing recycled pages.
167	 *
168	 * Returning/freeing pages is more complicated synchronization
169	 * wise, because free's can happen on remote CPUs, with no
170	 * association with allocation resource.
171	 *
172	 * Use ptr_ring, as it separates consumer and producer
173	 * efficiently, it a way that doesn't bounce cache-lines.
174	 *
175	 * TODO: Implement bulk return pages into this structure.
176	 */
177	struct ptr_ring ring;
178
179#ifdef CONFIG_PAGE_POOL_STATS
180	/* recycle stats are per-cpu to avoid locking */
181	struct page_pool_recycle_stats __percpu *recycle_stats;
182#endif
183	atomic_t pages_state_release_cnt;
184
185	/* A page_pool is strictly tied to a single RX-queue being
186	 * protected by NAPI, due to above pp_alloc_cache. This
187	 * refcnt serves purpose is to simplify drivers error handling.
188	 */
189	refcount_t user_cnt;
190
191	u64 destroy_cnt;
192
193	/* Slow/Control-path information follows */
194	struct page_pool_params_slow slow;
195	/* User-facing fields, protected by page_pools_lock */
196	struct {
197		struct hlist_node list;
198		u64 detach_time;
199		u32 napi_id;
200		u32 id;
201	} user;
202};
203
204struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
205struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
206				  unsigned int size, gfp_t gfp);
207struct page_pool *page_pool_create(const struct page_pool_params *params);
208struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
209					  int cpuid);
210
211struct xdp_mem_info;
212
213#ifdef CONFIG_PAGE_POOL
214void page_pool_destroy(struct page_pool *pool);
215void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
216			   struct xdp_mem_info *mem);
217void page_pool_put_page_bulk(struct page_pool *pool, void **data,
218			     int count);
219#else
220static inline void page_pool_destroy(struct page_pool *pool)
221{
222}
223
224static inline void page_pool_use_xdp_mem(struct page_pool *pool,
225					 void (*disconnect)(void *),
226					 struct xdp_mem_info *mem)
227{
228}
229
230static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
231					   int count)
232{
233}
234#endif
235
236void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
237				unsigned int dma_sync_size,
238				bool allow_direct);
239
240static inline bool is_page_pool_compiled_in(void)
241{
242#ifdef CONFIG_PAGE_POOL
243	return true;
244#else
245	return false;
246#endif
247}
248
249/* Caller must provide appropriate safe context, e.g. NAPI. */
250void page_pool_update_nid(struct page_pool *pool, int new_nid);
251
252#endif /* _NET_PAGE_POOL_H */
253