1/* SPDX-License-Identifier: GPL-2.0 */
2
3#ifndef _KERNEL_PRINTK_RINGBUFFER_H
4#define _KERNEL_PRINTK_RINGBUFFER_H
5
6#include <linux/atomic.h>
7#include <linux/dev_printk.h>
8
9/*
10 * Meta information about each stored message.
11 *
12 * All fields are set by the printk code except for @seq, which is
13 * set by the ringbuffer code.
14 */
15struct printk_info {
16	u64	seq;		/* sequence number */
17	u64	ts_nsec;	/* timestamp in nanoseconds */
18	u16	text_len;	/* length of text message */
19	u8	facility;	/* syslog facility */
20	u8	flags:5;	/* internal record flags */
21	u8	level:3;	/* syslog level */
22	u32	caller_id;	/* thread id or processor id */
23
24	struct dev_printk_info	dev_info;
25};
26
27/*
28 * A structure providing the buffers, used by writers and readers.
29 *
30 * Writers:
31 * Using prb_rec_init_wr(), a writer sets @text_buf_size before calling
32 * prb_reserve(). On success, prb_reserve() sets @info and @text_buf to
33 * buffers reserved for that writer.
34 *
35 * Readers:
36 * Using prb_rec_init_rd(), a reader sets all fields before calling
37 * prb_read_valid(). Note that the reader provides the @info and @text_buf,
38 * buffers. On success, the struct pointed to by @info will be filled and
39 * the char array pointed to by @text_buf will be filled with text data.
40 */
41struct printk_record {
42	struct printk_info	*info;
43	char			*text_buf;
44	unsigned int		text_buf_size;
45};
46
47/* Specifies the logical position and span of a data block. */
48struct prb_data_blk_lpos {
49	unsigned long	begin;
50	unsigned long	next;
51};
52
53/*
54 * A descriptor: the complete meta-data for a record.
55 *
56 * @state_var: A bitwise combination of descriptor ID and descriptor state.
57 */
58struct prb_desc {
59	atomic_long_t			state_var;
60	struct prb_data_blk_lpos	text_blk_lpos;
61};
62
63/* A ringbuffer of "ID + data" elements. */
64struct prb_data_ring {
65	unsigned int	size_bits;
66	char		*data;
67	atomic_long_t	head_lpos;
68	atomic_long_t	tail_lpos;
69};
70
71/* A ringbuffer of "struct prb_desc" elements. */
72struct prb_desc_ring {
73	unsigned int		count_bits;
74	struct prb_desc		*descs;
75	struct printk_info	*infos;
76	atomic_long_t		head_id;
77	atomic_long_t		tail_id;
78	atomic_long_t		last_finalized_seq;
79};
80
81/*
82 * The high level structure representing the printk ringbuffer.
83 *
84 * @fail: Count of failed prb_reserve() calls where not even a data-less
85 *        record was created.
86 */
87struct printk_ringbuffer {
88	struct prb_desc_ring	desc_ring;
89	struct prb_data_ring	text_data_ring;
90	atomic_long_t		fail;
91};
92
93/*
94 * Used by writers as a reserve/commit handle.
95 *
96 * @rb:         Ringbuffer where the entry is reserved.
97 * @irqflags:   Saved irq flags to restore on entry commit.
98 * @id:         ID of the reserved descriptor.
99 * @text_space: Total occupied buffer space in the text data ring, including
100 *              ID, alignment padding, and wrapping data blocks.
101 *
102 * This structure is an opaque handle for writers. Its contents are only
103 * to be used by the ringbuffer implementation.
104 */
105struct prb_reserved_entry {
106	struct printk_ringbuffer	*rb;
107	unsigned long			irqflags;
108	unsigned long			id;
109	unsigned int			text_space;
110};
111
112/* The possible responses of a descriptor state-query. */
113enum desc_state {
114	desc_miss	=  -1,	/* ID mismatch (pseudo state) */
115	desc_reserved	= 0x0,	/* reserved, in use by writer */
116	desc_committed	= 0x1,	/* committed by writer, could get reopened */
117	desc_finalized	= 0x2,	/* committed, no further modification allowed */
118	desc_reusable	= 0x3,	/* free, not yet used by any writer */
119};
120
121#define _DATA_SIZE(sz_bits)	(1UL << (sz_bits))
122#define _DESCS_COUNT(ct_bits)	(1U << (ct_bits))
123#define DESC_SV_BITS		(sizeof(unsigned long) * 8)
124#define DESC_FLAGS_SHIFT	(DESC_SV_BITS - 2)
125#define DESC_FLAGS_MASK		(3UL << DESC_FLAGS_SHIFT)
126#define DESC_STATE(sv)		(3UL & (sv >> DESC_FLAGS_SHIFT))
127#define DESC_SV(id, state)	(((unsigned long)state << DESC_FLAGS_SHIFT) | id)
128#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
129#define DESC_ID(sv)		((sv) & DESC_ID_MASK)
130
131/*
132 * Special data block logical position values (for fields of
133 * @prb_desc.text_blk_lpos).
134 *
135 * - Bit0 is used to identify if the record has no data block. (Implemented in
136 *   the LPOS_DATALESS() macro.)
137 *
138 * - Bit1 specifies the reason for not having a data block.
139 *
140 * These special values could never be real lpos values because of the
141 * meta data and alignment padding of data blocks. (See to_blk_size() for
142 * details.)
143 */
144#define FAILED_LPOS		0x1
145#define EMPTY_LINE_LPOS		0x3
146
147#define FAILED_BLK_LPOS	\
148{				\
149	.begin	= FAILED_LPOS,	\
150	.next	= FAILED_LPOS,	\
151}
152
153/*
154 * Descriptor Bootstrap
155 *
156 * The descriptor array is minimally initialized to allow immediate usage
157 * by readers and writers. The requirements that the descriptor array
158 * initialization must satisfy:
159 *
160 *   Req1
161 *     The tail must point to an existing (committed or reusable) descriptor.
162 *     This is required by the implementation of prb_first_seq().
163 *
164 *   Req2
165 *     Readers must see that the ringbuffer is initially empty.
166 *
167 *   Req3
168 *     The first record reserved by a writer is assigned sequence number 0.
169 *
170 * To satisfy Req1, the tail initially points to a descriptor that is
171 * minimally initialized (having no data block, i.e. data-less with the
172 * data block's lpos @begin and @next values set to FAILED_LPOS).
173 *
174 * To satisfy Req2, the initial tail descriptor is initialized to the
175 * reusable state. Readers recognize reusable descriptors as existing
176 * records, but skip over them.
177 *
178 * To satisfy Req3, the last descriptor in the array is used as the initial
179 * head (and tail) descriptor. This allows the first record reserved by a
180 * writer (head + 1) to be the first descriptor in the array. (Only the first
181 * descriptor in the array could have a valid sequence number of 0.)
182 *
183 * The first time a descriptor is reserved, it is assigned a sequence number
184 * with the value of the array index. A "first time reserved" descriptor can
185 * be recognized because it has a sequence number of 0 but does not have an
186 * index of 0. (Only the first descriptor in the array could have a valid
187 * sequence number of 0.) After the first reservation, all future reservations
188 * (recycling) simply involve incrementing the sequence number by the array
189 * count.
190 *
191 *   Hack #1
192 *     Only the first descriptor in the array is allowed to have the sequence
193 *     number 0. In this case it is not possible to recognize if it is being
194 *     reserved the first time (set to index value) or has been reserved
195 *     previously (increment by the array count). This is handled by _always_
196 *     incrementing the sequence number by the array count when reserving the
197 *     first descriptor in the array. In order to satisfy Req3, the sequence
198 *     number of the first descriptor in the array is initialized to minus
199 *     the array count. Then, upon the first reservation, it is incremented
200 *     to 0, thus satisfying Req3.
201 *
202 *   Hack #2
203 *     prb_first_seq() can be called at any time by readers to retrieve the
204 *     sequence number of the tail descriptor. However, due to Req2 and Req3,
205 *     initially there are no records to report the sequence number of
206 *     (sequence numbers are u64 and there is nothing less than 0). To handle
207 *     this, the sequence number of the initial tail descriptor is initialized
208 *     to 0. Technically this is incorrect, because there is no record with
209 *     sequence number 0 (yet) and the tail descriptor is not the first
210 *     descriptor in the array. But it allows prb_read_valid() to correctly
211 *     report the existence of a record for _any_ given sequence number at all
212 *     times. Bootstrapping is complete when the tail is pushed the first
213 *     time, thus finally pointing to the first descriptor reserved by a
214 *     writer, which has the assigned sequence number 0.
215 */
216
217/*
218 * Initiating Logical Value Overflows
219 *
220 * Both logical position (lpos) and ID values can be mapped to array indexes
221 * but may experience overflows during the lifetime of the system. To ensure
222 * that printk_ringbuffer can handle the overflows for these types, initial
223 * values are chosen that map to the correct initial array indexes, but will
224 * result in overflows soon.
225 *
226 *   BLK0_LPOS
227 *     The initial @head_lpos and @tail_lpos for data rings. It is at index
228 *     0 and the lpos value is such that it will overflow on the first wrap.
229 *
230 *   DESC0_ID
231 *     The initial @head_id and @tail_id for the desc ring. It is at the last
232 *     index of the descriptor array (see Req3 above) and the ID value is such
233 *     that it will overflow on the second wrap.
234 */
235#define BLK0_LPOS(sz_bits)	(-(_DATA_SIZE(sz_bits)))
236#define DESC0_ID(ct_bits)	DESC_ID(-(_DESCS_COUNT(ct_bits) + 1))
237#define DESC0_SV(ct_bits)	DESC_SV(DESC0_ID(ct_bits), desc_reusable)
238
239/*
240 * Define a ringbuffer with an external text data buffer. The same as
241 * DEFINE_PRINTKRB() but requires specifying an external buffer for the
242 * text data.
243 *
244 * Note: The specified external buffer must be of the size:
245 *       2 ^ (descbits + avgtextbits)
246 */
247#define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf)			\
248static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = {				\
249	/* the initial head and tail */								\
250	[_DESCS_COUNT(descbits) - 1] = {							\
251		/* reusable */									\
252		.state_var	= ATOMIC_INIT(DESC0_SV(descbits)),				\
253		/* no associated data block */							\
254		.text_blk_lpos	= FAILED_BLK_LPOS,						\
255	},											\
256};												\
257static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = {				\
258	/* this will be the first record reserved by a writer */				\
259	[0] = {											\
260		/* will be incremented to 0 on the first reservation */				\
261		.seq = -(u64)_DESCS_COUNT(descbits),						\
262	},											\
263	/* the initial head and tail */								\
264	[_DESCS_COUNT(descbits) - 1] = {							\
265		/* reports the first seq value during the bootstrap phase */			\
266		.seq = 0,									\
267	},											\
268};												\
269static struct printk_ringbuffer name = {							\
270	.desc_ring = {										\
271		.count_bits	= descbits,							\
272		.descs		= &_##name##_descs[0],						\
273		.infos		= &_##name##_infos[0],						\
274		.head_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
275		.tail_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
276		.last_finalized_seq = ATOMIC_INIT(0),						\
277	},											\
278	.text_data_ring = {									\
279		.size_bits	= (avgtextbits) + (descbits),					\
280		.data		= text_buf,							\
281		.head_lpos	= ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))),	\
282		.tail_lpos	= ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))),	\
283	},											\
284	.fail			= ATOMIC_LONG_INIT(0),						\
285}
286
287/**
288 * DEFINE_PRINTKRB() - Define a ringbuffer.
289 *
290 * @name:        The name of the ringbuffer variable.
291 * @descbits:    The number of descriptors as a power-of-2 value.
292 * @avgtextbits: The average text data size per record as a power-of-2 value.
293 *
294 * This is a macro for defining a ringbuffer and all internal structures
295 * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a
296 * variant where the text data buffer can be specified externally.
297 */
298#define DEFINE_PRINTKRB(name, descbits, avgtextbits)				\
299static char _##name##_text[1U << ((avgtextbits) + (descbits))]			\
300			__aligned(__alignof__(unsigned long));			\
301_DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0])
302
303/* Writer Interface */
304
305/**
306 * prb_rec_init_wr() - Initialize a buffer for writing records.
307 *
308 * @r:             The record to initialize.
309 * @text_buf_size: The needed text buffer size.
310 */
311static inline void prb_rec_init_wr(struct printk_record *r,
312				   unsigned int text_buf_size)
313{
314	r->info = NULL;
315	r->text_buf = NULL;
316	r->text_buf_size = text_buf_size;
317}
318
319bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
320		 struct printk_record *r);
321bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
322			 struct printk_record *r, u32 caller_id, unsigned int max_size);
323void prb_commit(struct prb_reserved_entry *e);
324void prb_final_commit(struct prb_reserved_entry *e);
325
326void prb_init(struct printk_ringbuffer *rb,
327	      char *text_buf, unsigned int text_buf_size,
328	      struct prb_desc *descs, unsigned int descs_count_bits,
329	      struct printk_info *infos);
330unsigned int prb_record_text_space(struct prb_reserved_entry *e);
331
332/* Reader Interface */
333
334/**
335 * prb_rec_init_rd() - Initialize a buffer for reading records.
336 *
337 * @r:             The record to initialize.
338 * @info:          A buffer to store record meta-data.
339 * @text_buf:      A buffer to store text data.
340 * @text_buf_size: The size of @text_buf.
341 *
342 * Initialize all the fields that a reader is interested in. All arguments
343 * (except @r) are optional. Only record data for arguments that are
344 * non-NULL or non-zero will be read.
345 */
346static inline void prb_rec_init_rd(struct printk_record *r,
347				   struct printk_info *info,
348				   char *text_buf, unsigned int text_buf_size)
349{
350	r->info = info;
351	r->text_buf = text_buf;
352	r->text_buf_size = text_buf_size;
353}
354
355/**
356 * prb_for_each_record() - Iterate over the records of a ringbuffer.
357 *
358 * @from: The sequence number to begin with.
359 * @rb:   The ringbuffer to iterate over.
360 * @s:    A u64 to store the sequence number on each iteration.
361 * @r:    A printk_record to store the record on each iteration.
362 *
363 * This is a macro for conveniently iterating over a ringbuffer.
364 * Note that @s may not be the sequence number of the record on each
365 * iteration. For the sequence number, @r->info->seq should be checked.
366 *
367 * Context: Any context.
368 */
369#define prb_for_each_record(from, rb, s, r) \
370for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1)
371
372/**
373 * prb_for_each_info() - Iterate over the meta data of a ringbuffer.
374 *
375 * @from: The sequence number to begin with.
376 * @rb:   The ringbuffer to iterate over.
377 * @s:    A u64 to store the sequence number on each iteration.
378 * @i:    A printk_info to store the record meta data on each iteration.
379 * @lc:   An unsigned int to store the text line count of each record.
380 *
381 * This is a macro for conveniently iterating over a ringbuffer.
382 * Note that @s may not be the sequence number of the record on each
383 * iteration. For the sequence number, @r->info->seq should be checked.
384 *
385 * Context: Any context.
386 */
387#define prb_for_each_info(from, rb, s, i, lc) \
388for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1)
389
390bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
391		    struct printk_record *r);
392bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
393			 struct printk_info *info, unsigned int *line_count);
394
395u64 prb_first_seq(struct printk_ringbuffer *rb);
396u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
397u64 prb_next_seq(struct printk_ringbuffer *rb);
398u64 prb_next_reserve_seq(struct printk_ringbuffer *rb);
399
400#ifdef CONFIG_64BIT
401
402#define __u64seq_to_ulseq(u64seq) (u64seq)
403#define __ulseq_to_u64seq(rb, ulseq) (ulseq)
404
405#else /* CONFIG_64BIT */
406
407#define __u64seq_to_ulseq(u64seq) ((u32)u64seq)
408
409static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq)
410{
411	u64 rb_first_seq = prb_first_seq(rb);
412	u64 seq;
413
414	/*
415	 * The provided sequence is only the lower 32 bits of the ringbuffer
416	 * sequence. It needs to be expanded to 64bit. Get the first sequence
417	 * number from the ringbuffer and fold it.
418	 *
419	 * Having a 32bit representation in the console is sufficient.
420	 * If a console ever gets more than 2^31 records behind
421	 * the ringbuffer then this is the least of the problems.
422	 *
423	 * Also the access to the ring buffer is always safe.
424	 */
425	seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq);
426
427	return seq;
428}
429
430#endif /* CONFIG_64BIT */
431
432#endif /* _KERNEL_PRINTK_RINGBUFFER_H */
433