1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright(C) 2015-2018 Linaro Limited.
4 *
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7 */
8
9#include <linux/kernel.h>
10#include <linux/bitfield.h>
11#include <linux/bitops.h>
12#include <linux/coresight-pmu.h>
13#include <linux/err.h>
14#include <linux/log2.h>
15#include <linux/types.h>
16#include <linux/zalloc.h>
17
18#include <stdlib.h>
19
20#include "auxtrace.h"
21#include "color.h"
22#include "cs-etm.h"
23#include "cs-etm-decoder/cs-etm-decoder.h"
24#include "debug.h"
25#include "dso.h"
26#include "evlist.h"
27#include "intlist.h"
28#include "machine.h"
29#include "map.h"
30#include "perf.h"
31#include "session.h"
32#include "map_symbol.h"
33#include "branch.h"
34#include "symbol.h"
35#include "tool.h"
36#include "thread.h"
37#include "thread-stack.h"
38#include "tsc.h"
39#include <tools/libc_compat.h>
40#include "util/synthetic-events.h"
41#include "util/util.h"
42
43struct cs_etm_auxtrace {
44	struct auxtrace auxtrace;
45	struct auxtrace_queues queues;
46	struct auxtrace_heap heap;
47	struct itrace_synth_opts synth_opts;
48	struct perf_session *session;
49	struct perf_tsc_conversion tc;
50
51	/*
52	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53	 * are less accurate but produces smaller trace data. We use context IDs
54	 * in the trace instead of matching timestamps with fork records so
55	 * they're not really needed in the general case. Overlapping mmaps
56	 * happen in cases like between a fork and an exec.
57	 */
58	bool timeless_decoding;
59
60	/*
61	 * Per-thread ignores the trace channel ID and instead assumes that
62	 * everything in a buffer comes from the same process regardless of
63	 * which CPU it ran on. It also implies no context IDs so the TID is
64	 * taken from the auxtrace buffer.
65	 */
66	bool per_thread_decoding;
67	bool snapshot_mode;
68	bool data_queued;
69	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70
71	int num_cpu;
72	u64 latest_kernel_timestamp;
73	u32 auxtrace_type;
74	u64 branches_sample_type;
75	u64 branches_id;
76	u64 instructions_sample_type;
77	u64 instructions_sample_period;
78	u64 instructions_id;
79	u64 **metadata;
80	unsigned int pmu_type;
81	enum cs_etm_pid_fmt pid_fmt;
82};
83
84struct cs_etm_traceid_queue {
85	u8 trace_chan_id;
86	u64 period_instructions;
87	size_t last_branch_pos;
88	union perf_event *event_buf;
89	struct thread *thread;
90	struct thread *prev_packet_thread;
91	ocsd_ex_level prev_packet_el;
92	ocsd_ex_level el;
93	struct branch_stack *last_branch;
94	struct branch_stack *last_branch_rb;
95	struct cs_etm_packet *prev_packet;
96	struct cs_etm_packet *packet;
97	struct cs_etm_packet_queue packet_queue;
98};
99
100struct cs_etm_queue {
101	struct cs_etm_auxtrace *etm;
102	struct cs_etm_decoder *decoder;
103	struct auxtrace_buffer *buffer;
104	unsigned int queue_nr;
105	u8 pending_timestamp_chan_id;
106	u64 offset;
107	const unsigned char *buf;
108	size_t buf_len, buf_used;
109	/* Conversion between traceID and index in traceid_queues array */
110	struct intlist *traceid_queues_list;
111	struct cs_etm_traceid_queue **traceid_queues;
112};
113
114/* RB tree for quick conversion between traceID and metadata pointers */
115static struct intlist *traceid_list;
116
117static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
119					   pid_t tid);
120static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
122
123/* PTMs ETMIDR [11:8] set to b0011 */
124#define ETMIDR_PTM_VERSION 0x00000300
125
126/*
127 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128 * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
129 * encode the etm queue number as the upper 16 bit and the channel as
130 * the lower 16 bit.
131 */
132#define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
133		      (queue_nr << 16 | trace_chan_id)
134#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
136
137static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
138{
139	etmidr &= ETMIDR_PTM_VERSION;
140
141	if (etmidr == ETMIDR_PTM_VERSION)
142		return CS_ETM_PROTO_PTM;
143
144	return CS_ETM_PROTO_ETMV3;
145}
146
147static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
148{
149	struct int_node *inode;
150	u64 *metadata;
151
152	inode = intlist__find(traceid_list, trace_chan_id);
153	if (!inode)
154		return -EINVAL;
155
156	metadata = inode->priv;
157	*magic = metadata[CS_ETM_MAGIC];
158	return 0;
159}
160
161int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
162{
163	struct int_node *inode;
164	u64 *metadata;
165
166	inode = intlist__find(traceid_list, trace_chan_id);
167	if (!inode)
168		return -EINVAL;
169
170	metadata = inode->priv;
171	*cpu = (int)metadata[CS_ETM_CPU];
172	return 0;
173}
174
175/*
176 * The returned PID format is presented as an enum:
177 *
178 *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179 *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180 *   CS_ETM_PIDFMT_NONE: No context IDs
181 *
182 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183 * are enabled at the same time when the session runs on an EL2 kernel.
184 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185 * recorded in the trace data, the tool will selectively use
186 * CONTEXTIDR_EL2 as PID.
187 *
188 * The result is cached in etm->pid_fmt so this function only needs to be called
189 * when processing the aux info.
190 */
191static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
192{
193	u64 val;
194
195	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196		val = metadata[CS_ETM_ETMCR];
197		/* CONTEXTIDR is traced */
198		if (val & BIT(ETM_OPT_CTXTID))
199			return CS_ETM_PIDFMT_CTXTID;
200	} else {
201		val = metadata[CS_ETMV4_TRCCONFIGR];
202		/* CONTEXTIDR_EL2 is traced */
203		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204			return CS_ETM_PIDFMT_CTXTID2;
205		/* CONTEXTIDR_EL1 is traced */
206		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207			return CS_ETM_PIDFMT_CTXTID;
208	}
209
210	return CS_ETM_PIDFMT_NONE;
211}
212
213enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
214{
215	return etmq->etm->pid_fmt;
216}
217
218static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
219{
220	struct int_node *inode;
221
222	/* Get an RB node for this CPU */
223	inode = intlist__findnew(traceid_list, trace_chan_id);
224
225	/* Something went wrong, no need to continue */
226	if (!inode)
227		return -ENOMEM;
228
229	/*
230	 * The node for that CPU should not be taken.
231	 * Back out if that's the case.
232	 */
233	if (inode->priv)
234		return -EINVAL;
235
236	/* All good, associate the traceID with the metadata pointer */
237	inode->priv = cpu_metadata;
238
239	return 0;
240}
241
242static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
243{
244	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
245
246	switch (cs_etm_magic) {
247	case __perf_cs_etmv3_magic:
248		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249				      CORESIGHT_TRACE_ID_VAL_MASK);
250		break;
251	case __perf_cs_etmv4_magic:
252	case __perf_cs_ete_magic:
253		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254				      CORESIGHT_TRACE_ID_VAL_MASK);
255		break;
256	default:
257		return -EINVAL;
258	}
259	return 0;
260}
261
262/*
263 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264 * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
265 */
266static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
267{
268	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
269
270	switch (cs_etm_magic) {
271	case __perf_cs_etmv3_magic:
272		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
273		break;
274	case __perf_cs_etmv4_magic:
275	case __perf_cs_ete_magic:
276		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
277		break;
278
279	default:
280		return -EINVAL;
281	}
282	return 0;
283}
284
285/*
286 * Get a metadata index for a specific cpu from an array.
287 *
288 */
289static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
290{
291	int i;
292
293	for (i = 0; i < etm->num_cpu; i++) {
294		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
295			return i;
296		}
297	}
298
299	return -1;
300}
301
302/*
303 * Get a metadata for a specific cpu from an array.
304 *
305 */
306static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
307{
308	int idx = get_cpu_data_idx(etm, cpu);
309
310	return (idx != -1) ? etm->metadata[idx] : NULL;
311}
312
313/*
314 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
315 *
316 * The payload associates the Trace ID and the CPU.
317 * The routine is tolerant of seeing multiple packets with the same association,
318 * but a CPU / Trace ID association changing during a session is an error.
319 */
320static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
321					    union perf_event *event)
322{
323	struct cs_etm_auxtrace *etm;
324	struct perf_sample sample;
325	struct int_node *inode;
326	struct evsel *evsel;
327	u64 *cpu_data;
328	u64 hw_id;
329	int cpu, version, err;
330	u8 trace_chan_id, curr_chan_id;
331
332	/* extract and parse the HW ID */
333	hw_id = event->aux_output_hw_id.hw_id;
334	version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
335	trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
336
337	/* check that we can handle this version */
338	if (version > CS_AUX_HW_ID_CURR_VERSION)
339		return -EINVAL;
340
341	/* get access to the etm metadata */
342	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
343	if (!etm || !etm->metadata)
344		return -EINVAL;
345
346	/* parse the sample to get the CPU */
347	evsel = evlist__event2evsel(session->evlist, event);
348	if (!evsel)
349		return -EINVAL;
350	err = evsel__parse_sample(evsel, event, &sample);
351	if (err)
352		return err;
353	cpu = sample.cpu;
354	if (cpu == -1) {
355		/* no CPU in the sample - possibly recorded with an old version of perf */
356		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
357		return -EINVAL;
358	}
359
360	/* See if the ID is mapped to a CPU, and it matches the current CPU */
361	inode = intlist__find(traceid_list, trace_chan_id);
362	if (inode) {
363		cpu_data = inode->priv;
364		if ((int)cpu_data[CS_ETM_CPU] != cpu) {
365			pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
366			return -EINVAL;
367		}
368
369		/* check that the mapped ID matches */
370		err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
371		if (err)
372			return err;
373		if (curr_chan_id != trace_chan_id) {
374			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
375			return -EINVAL;
376		}
377
378		/* mapped and matched - return OK */
379		return 0;
380	}
381
382	cpu_data = get_cpu_data(etm, cpu);
383	if (cpu_data == NULL)
384		return err;
385
386	/* not one we've seen before - lets map it */
387	err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
388	if (err)
389		return err;
390
391	/*
392	 * if we are picking up the association from the packet, need to plug
393	 * the correct trace ID into the metadata for setting up decoders later.
394	 */
395	err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
396	return err;
397}
398
399void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
400					      u8 trace_chan_id)
401{
402	/*
403	 * When a timestamp packet is encountered the backend code
404	 * is stopped so that the front end has time to process packets
405	 * that were accumulated in the traceID queue.  Since there can
406	 * be more than one channel per cs_etm_queue, we need to specify
407	 * what traceID queue needs servicing.
408	 */
409	etmq->pending_timestamp_chan_id = trace_chan_id;
410}
411
412static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
413				      u8 *trace_chan_id)
414{
415	struct cs_etm_packet_queue *packet_queue;
416
417	if (!etmq->pending_timestamp_chan_id)
418		return 0;
419
420	if (trace_chan_id)
421		*trace_chan_id = etmq->pending_timestamp_chan_id;
422
423	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
424						     etmq->pending_timestamp_chan_id);
425	if (!packet_queue)
426		return 0;
427
428	/* Acknowledge pending status */
429	etmq->pending_timestamp_chan_id = 0;
430
431	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
432	return packet_queue->cs_timestamp;
433}
434
435static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
436{
437	int i;
438
439	queue->head = 0;
440	queue->tail = 0;
441	queue->packet_count = 0;
442	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
443		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
444		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
445		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
446		queue->packet_buffer[i].instr_count = 0;
447		queue->packet_buffer[i].last_instr_taken_branch = false;
448		queue->packet_buffer[i].last_instr_size = 0;
449		queue->packet_buffer[i].last_instr_type = 0;
450		queue->packet_buffer[i].last_instr_subtype = 0;
451		queue->packet_buffer[i].last_instr_cond = 0;
452		queue->packet_buffer[i].flags = 0;
453		queue->packet_buffer[i].exception_number = UINT32_MAX;
454		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
455		queue->packet_buffer[i].cpu = INT_MIN;
456	}
457}
458
459static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
460{
461	int idx;
462	struct int_node *inode;
463	struct cs_etm_traceid_queue *tidq;
464	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
465
466	intlist__for_each_entry(inode, traceid_queues_list) {
467		idx = (int)(intptr_t)inode->priv;
468		tidq = etmq->traceid_queues[idx];
469		cs_etm__clear_packet_queue(&tidq->packet_queue);
470	}
471}
472
473static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
474				      struct cs_etm_traceid_queue *tidq,
475				      u8 trace_chan_id)
476{
477	int rc = -ENOMEM;
478	struct auxtrace_queue *queue;
479	struct cs_etm_auxtrace *etm = etmq->etm;
480
481	cs_etm__clear_packet_queue(&tidq->packet_queue);
482
483	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
484	tidq->trace_chan_id = trace_chan_id;
485	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
486	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
487					       queue->tid);
488	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
489
490	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
491	if (!tidq->packet)
492		goto out;
493
494	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
495	if (!tidq->prev_packet)
496		goto out_free;
497
498	if (etm->synth_opts.last_branch) {
499		size_t sz = sizeof(struct branch_stack);
500
501		sz += etm->synth_opts.last_branch_sz *
502		      sizeof(struct branch_entry);
503		tidq->last_branch = zalloc(sz);
504		if (!tidq->last_branch)
505			goto out_free;
506		tidq->last_branch_rb = zalloc(sz);
507		if (!tidq->last_branch_rb)
508			goto out_free;
509	}
510
511	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
512	if (!tidq->event_buf)
513		goto out_free;
514
515	return 0;
516
517out_free:
518	zfree(&tidq->last_branch_rb);
519	zfree(&tidq->last_branch);
520	zfree(&tidq->prev_packet);
521	zfree(&tidq->packet);
522out:
523	return rc;
524}
525
526static struct cs_etm_traceid_queue
527*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
528{
529	int idx;
530	struct int_node *inode;
531	struct intlist *traceid_queues_list;
532	struct cs_etm_traceid_queue *tidq, **traceid_queues;
533	struct cs_etm_auxtrace *etm = etmq->etm;
534
535	if (etm->per_thread_decoding)
536		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
537
538	traceid_queues_list = etmq->traceid_queues_list;
539
540	/*
541	 * Check if the traceid_queue exist for this traceID by looking
542	 * in the queue list.
543	 */
544	inode = intlist__find(traceid_queues_list, trace_chan_id);
545	if (inode) {
546		idx = (int)(intptr_t)inode->priv;
547		return etmq->traceid_queues[idx];
548	}
549
550	/* We couldn't find a traceid_queue for this traceID, allocate one */
551	tidq = malloc(sizeof(*tidq));
552	if (!tidq)
553		return NULL;
554
555	memset(tidq, 0, sizeof(*tidq));
556
557	/* Get a valid index for the new traceid_queue */
558	idx = intlist__nr_entries(traceid_queues_list);
559	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
560	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
561	if (!inode)
562		goto out_free;
563
564	/* Associate this traceID with this index */
565	inode->priv = (void *)(intptr_t)idx;
566
567	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
568		goto out_free;
569
570	/* Grow the traceid_queues array by one unit */
571	traceid_queues = etmq->traceid_queues;
572	traceid_queues = reallocarray(traceid_queues,
573				      idx + 1,
574				      sizeof(*traceid_queues));
575
576	/*
577	 * On failure reallocarray() returns NULL and the original block of
578	 * memory is left untouched.
579	 */
580	if (!traceid_queues)
581		goto out_free;
582
583	traceid_queues[idx] = tidq;
584	etmq->traceid_queues = traceid_queues;
585
586	return etmq->traceid_queues[idx];
587
588out_free:
589	/*
590	 * Function intlist__remove() removes the inode from the list
591	 * and delete the memory associated to it.
592	 */
593	intlist__remove(traceid_queues_list, inode);
594	free(tidq);
595
596	return NULL;
597}
598
599struct cs_etm_packet_queue
600*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
601{
602	struct cs_etm_traceid_queue *tidq;
603
604	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
605	if (tidq)
606		return &tidq->packet_queue;
607
608	return NULL;
609}
610
611static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
612				struct cs_etm_traceid_queue *tidq)
613{
614	struct cs_etm_packet *tmp;
615
616	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
617	    etm->synth_opts.instructions) {
618		/*
619		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
620		 * the next incoming packet.
621		 *
622		 * Threads and exception levels are also tracked for both the
623		 * previous and current packets. This is because the previous
624		 * packet is used for the 'from' IP for branch samples, so the
625		 * thread at that time must also be assigned to that sample.
626		 * Across discontinuity packets the thread can change, so by
627		 * tracking the thread for the previous packet the branch sample
628		 * will have the correct info.
629		 */
630		tmp = tidq->packet;
631		tidq->packet = tidq->prev_packet;
632		tidq->prev_packet = tmp;
633		tidq->prev_packet_el = tidq->el;
634		thread__put(tidq->prev_packet_thread);
635		tidq->prev_packet_thread = thread__get(tidq->thread);
636	}
637}
638
639static void cs_etm__packet_dump(const char *pkt_string)
640{
641	const char *color = PERF_COLOR_BLUE;
642	int len = strlen(pkt_string);
643
644	if (len && (pkt_string[len-1] == '\n'))
645		color_fprintf(stdout, color, "	%s", pkt_string);
646	else
647		color_fprintf(stdout, color, "	%s\n", pkt_string);
648
649	fflush(stdout);
650}
651
652static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
653					  struct cs_etm_auxtrace *etm, int t_idx,
654					  int m_idx, u32 etmidr)
655{
656	u64 **metadata = etm->metadata;
657
658	t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
659	t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
660	t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
661}
662
663static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
664					  struct cs_etm_auxtrace *etm, int t_idx,
665					  int m_idx)
666{
667	u64 **metadata = etm->metadata;
668
669	t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
670	t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
671	t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
672	t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
673	t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
674	t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
675	t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
676}
677
678static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
679					  struct cs_etm_auxtrace *etm, int t_idx,
680					  int m_idx)
681{
682	u64 **metadata = etm->metadata;
683
684	t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
685	t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
686	t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
687	t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
688	t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
689	t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
690	t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
691	t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
692}
693
694static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
695				     struct cs_etm_auxtrace *etm,
696				     bool formatted,
697				     int sample_cpu,
698				     int decoders)
699{
700	int t_idx, m_idx;
701	u32 etmidr;
702	u64 architecture;
703
704	for (t_idx = 0; t_idx < decoders; t_idx++) {
705		if (formatted)
706			m_idx = t_idx;
707		else {
708			m_idx = get_cpu_data_idx(etm, sample_cpu);
709			if (m_idx == -1) {
710				pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
711				m_idx = 0;
712			}
713		}
714
715		architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
716
717		switch (architecture) {
718		case __perf_cs_etmv3_magic:
719			etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
720			cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
721			break;
722		case __perf_cs_etmv4_magic:
723			cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
724			break;
725		case __perf_cs_ete_magic:
726			cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
727			break;
728		default:
729			return -EINVAL;
730		}
731	}
732
733	return 0;
734}
735
736static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
737				       struct cs_etm_queue *etmq,
738				       enum cs_etm_decoder_operation mode,
739				       bool formatted)
740{
741	int ret = -EINVAL;
742
743	if (!(mode < CS_ETM_OPERATION_MAX))
744		goto out;
745
746	d_params->packet_printer = cs_etm__packet_dump;
747	d_params->operation = mode;
748	d_params->data = etmq;
749	d_params->formatted = formatted;
750	d_params->fsyncs = false;
751	d_params->hsyncs = false;
752	d_params->frame_aligned = true;
753
754	ret = 0;
755out:
756	return ret;
757}
758
759static void cs_etm__dump_event(struct cs_etm_queue *etmq,
760			       struct auxtrace_buffer *buffer)
761{
762	int ret;
763	const char *color = PERF_COLOR_BLUE;
764	size_t buffer_used = 0;
765
766	fprintf(stdout, "\n");
767	color_fprintf(stdout, color,
768		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
769		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
770
771	do {
772		size_t consumed;
773
774		ret = cs_etm_decoder__process_data_block(
775				etmq->decoder, buffer->offset,
776				&((u8 *)buffer->data)[buffer_used],
777				buffer->size - buffer_used, &consumed);
778		if (ret)
779			break;
780
781		buffer_used += consumed;
782	} while (buffer_used < buffer->size);
783
784	cs_etm_decoder__reset(etmq->decoder);
785}
786
787static int cs_etm__flush_events(struct perf_session *session,
788				struct perf_tool *tool)
789{
790	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
791						   struct cs_etm_auxtrace,
792						   auxtrace);
793	if (dump_trace)
794		return 0;
795
796	if (!tool->ordered_events)
797		return -EINVAL;
798
799	if (etm->timeless_decoding) {
800		/*
801		 * Pass tid = -1 to process all queues. But likely they will have
802		 * already been processed on PERF_RECORD_EXIT anyway.
803		 */
804		return cs_etm__process_timeless_queues(etm, -1);
805	}
806
807	return cs_etm__process_timestamped_queues(etm);
808}
809
810static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
811{
812	int idx;
813	uintptr_t priv;
814	struct int_node *inode, *tmp;
815	struct cs_etm_traceid_queue *tidq;
816	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
817
818	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
819		priv = (uintptr_t)inode->priv;
820		idx = priv;
821
822		/* Free this traceid_queue from the array */
823		tidq = etmq->traceid_queues[idx];
824		thread__zput(tidq->thread);
825		thread__zput(tidq->prev_packet_thread);
826		zfree(&tidq->event_buf);
827		zfree(&tidq->last_branch);
828		zfree(&tidq->last_branch_rb);
829		zfree(&tidq->prev_packet);
830		zfree(&tidq->packet);
831		zfree(&tidq);
832
833		/*
834		 * Function intlist__remove() removes the inode from the list
835		 * and delete the memory associated to it.
836		 */
837		intlist__remove(traceid_queues_list, inode);
838	}
839
840	/* Then the RB tree itself */
841	intlist__delete(traceid_queues_list);
842	etmq->traceid_queues_list = NULL;
843
844	/* finally free the traceid_queues array */
845	zfree(&etmq->traceid_queues);
846}
847
848static void cs_etm__free_queue(void *priv)
849{
850	struct cs_etm_queue *etmq = priv;
851
852	if (!etmq)
853		return;
854
855	cs_etm_decoder__free(etmq->decoder);
856	cs_etm__free_traceid_queues(etmq);
857	free(etmq);
858}
859
860static void cs_etm__free_events(struct perf_session *session)
861{
862	unsigned int i;
863	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
864						   struct cs_etm_auxtrace,
865						   auxtrace);
866	struct auxtrace_queues *queues = &aux->queues;
867
868	for (i = 0; i < queues->nr_queues; i++) {
869		cs_etm__free_queue(queues->queue_array[i].priv);
870		queues->queue_array[i].priv = NULL;
871	}
872
873	auxtrace_queues__free(queues);
874}
875
876static void cs_etm__free(struct perf_session *session)
877{
878	int i;
879	struct int_node *inode, *tmp;
880	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
881						   struct cs_etm_auxtrace,
882						   auxtrace);
883	cs_etm__free_events(session);
884	session->auxtrace = NULL;
885
886	/* First remove all traceID/metadata nodes for the RB tree */
887	intlist__for_each_entry_safe(inode, tmp, traceid_list)
888		intlist__remove(traceid_list, inode);
889	/* Then the RB tree itself */
890	intlist__delete(traceid_list);
891
892	for (i = 0; i < aux->num_cpu; i++)
893		zfree(&aux->metadata[i]);
894
895	zfree(&aux->metadata);
896	zfree(&aux);
897}
898
899static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
900				      struct evsel *evsel)
901{
902	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
903						   struct cs_etm_auxtrace,
904						   auxtrace);
905
906	return evsel->core.attr.type == aux->pmu_type;
907}
908
909static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
910					   ocsd_ex_level el)
911{
912	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
913
914	/*
915	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
916	 * running at EL1 assume everything is the host.
917	 */
918	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
919		return &etmq->etm->session->machines.host;
920
921	/*
922	 * Not perfect, but otherwise assume anything in EL1 is the default
923	 * guest, and everything else is the host. Distinguishing between guest
924	 * and host userspaces isn't currently supported either. Neither is
925	 * multiple guest support. All this does is reduce the likeliness of
926	 * decode errors where we look into the host kernel maps when it should
927	 * have been the guest maps.
928	 */
929	switch (el) {
930	case ocsd_EL1:
931		return machines__find_guest(&etmq->etm->session->machines,
932					    DEFAULT_GUEST_KERNEL_ID);
933	case ocsd_EL3:
934	case ocsd_EL2:
935	case ocsd_EL0:
936	case ocsd_EL_unknown:
937	default:
938		return &etmq->etm->session->machines.host;
939	}
940}
941
942static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
943			   ocsd_ex_level el)
944{
945	struct machine *machine = cs_etm__get_machine(etmq, el);
946
947	if (address >= machine__kernel_start(machine)) {
948		if (machine__is_host(machine))
949			return PERF_RECORD_MISC_KERNEL;
950		else
951			return PERF_RECORD_MISC_GUEST_KERNEL;
952	} else {
953		if (machine__is_host(machine))
954			return PERF_RECORD_MISC_USER;
955		else {
956			/*
957			 * Can't really happen at the moment because
958			 * cs_etm__get_machine() will always return
959			 * machines.host for any non EL1 trace.
960			 */
961			return PERF_RECORD_MISC_GUEST_USER;
962		}
963	}
964}
965
966static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
967			      u64 address, size_t size, u8 *buffer,
968			      const ocsd_mem_space_acc_t mem_space)
969{
970	u8  cpumode;
971	u64 offset;
972	int len;
973	struct addr_location al;
974	struct dso *dso;
975	struct cs_etm_traceid_queue *tidq;
976	int ret = 0;
977
978	if (!etmq)
979		return 0;
980
981	addr_location__init(&al);
982	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
983	if (!tidq)
984		goto out;
985
986	/*
987	 * We've already tracked EL along side the PID in cs_etm__set_thread()
988	 * so double check that it matches what OpenCSD thinks as well. It
989	 * doesn't distinguish between EL0 and EL1 for this mem access callback
990	 * so we had to do the extra tracking. Skip validation if it's any of
991	 * the 'any' values.
992	 */
993	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
994	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
995		if (mem_space & OCSD_MEM_SPACE_EL1N) {
996			/* Includes both non secure EL1 and EL0 */
997			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
998		} else if (mem_space & OCSD_MEM_SPACE_EL2)
999			assert(tidq->el == ocsd_EL2);
1000		else if (mem_space & OCSD_MEM_SPACE_EL3)
1001			assert(tidq->el == ocsd_EL3);
1002	}
1003
1004	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1005
1006	if (!thread__find_map(tidq->thread, cpumode, address, &al))
1007		goto out;
1008
1009	dso = map__dso(al.map);
1010	if (!dso)
1011		goto out;
1012
1013	if (dso->data.status == DSO_DATA_STATUS_ERROR &&
1014	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1015		goto out;
1016
1017	offset = map__map_ip(al.map, address);
1018
1019	map__load(al.map);
1020
1021	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1022				    offset, buffer, size);
1023
1024	if (len <= 0) {
1025		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1026				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1027		if (!dso->auxtrace_warned) {
1028			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1029				    address,
1030				    dso->long_name ? dso->long_name : "Unknown");
1031			dso->auxtrace_warned = true;
1032		}
1033		goto out;
1034	}
1035	ret = len;
1036out:
1037	addr_location__exit(&al);
1038	return ret;
1039}
1040
1041static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1042						bool formatted, int sample_cpu)
1043{
1044	struct cs_etm_decoder_params d_params;
1045	struct cs_etm_trace_params  *t_params = NULL;
1046	struct cs_etm_queue *etmq;
1047	/*
1048	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1049	 * needed.
1050	 */
1051	int decoders = formatted ? etm->num_cpu : 1;
1052
1053	etmq = zalloc(sizeof(*etmq));
1054	if (!etmq)
1055		return NULL;
1056
1057	etmq->traceid_queues_list = intlist__new(NULL);
1058	if (!etmq->traceid_queues_list)
1059		goto out_free;
1060
1061	/* Use metadata to fill in trace parameters for trace decoder */
1062	t_params = zalloc(sizeof(*t_params) * decoders);
1063
1064	if (!t_params)
1065		goto out_free;
1066
1067	if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
1068		goto out_free;
1069
1070	/* Set decoder parameters to decode trace packets */
1071	if (cs_etm__init_decoder_params(&d_params, etmq,
1072					dump_trace ? CS_ETM_OPERATION_PRINT :
1073						     CS_ETM_OPERATION_DECODE,
1074					formatted))
1075		goto out_free;
1076
1077	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1078					    t_params);
1079
1080	if (!etmq->decoder)
1081		goto out_free;
1082
1083	/*
1084	 * Register a function to handle all memory accesses required by
1085	 * the trace decoder library.
1086	 */
1087	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1088					      0x0L, ((u64) -1L),
1089					      cs_etm__mem_access))
1090		goto out_free_decoder;
1091
1092	zfree(&t_params);
1093	return etmq;
1094
1095out_free_decoder:
1096	cs_etm_decoder__free(etmq->decoder);
1097out_free:
1098	intlist__delete(etmq->traceid_queues_list);
1099	free(etmq);
1100
1101	return NULL;
1102}
1103
1104static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1105			       struct auxtrace_queue *queue,
1106			       unsigned int queue_nr,
1107			       bool formatted,
1108			       int sample_cpu)
1109{
1110	struct cs_etm_queue *etmq = queue->priv;
1111
1112	if (list_empty(&queue->head) || etmq)
1113		return 0;
1114
1115	etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
1116
1117	if (!etmq)
1118		return -ENOMEM;
1119
1120	queue->priv = etmq;
1121	etmq->etm = etm;
1122	etmq->queue_nr = queue_nr;
1123	etmq->offset = 0;
1124
1125	return 0;
1126}
1127
1128static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1129					    struct cs_etm_queue *etmq,
1130					    unsigned int queue_nr)
1131{
1132	int ret = 0;
1133	unsigned int cs_queue_nr;
1134	u8 trace_chan_id;
1135	u64 cs_timestamp;
1136
1137	/*
1138	 * We are under a CPU-wide trace scenario.  As such we need to know
1139	 * when the code that generated the traces started to execute so that
1140	 * it can be correlated with execution on other CPUs.  So we get a
1141	 * handle on the beginning of traces and decode until we find a
1142	 * timestamp.  The timestamp is then added to the auxtrace min heap
1143	 * in order to know what nibble (of all the etmqs) to decode first.
1144	 */
1145	while (1) {
1146		/*
1147		 * Fetch an aux_buffer from this etmq.  Bail if no more
1148		 * blocks or an error has been encountered.
1149		 */
1150		ret = cs_etm__get_data_block(etmq);
1151		if (ret <= 0)
1152			goto out;
1153
1154		/*
1155		 * Run decoder on the trace block.  The decoder will stop when
1156		 * encountering a CS timestamp, a full packet queue or the end of
1157		 * trace for that block.
1158		 */
1159		ret = cs_etm__decode_data_block(etmq);
1160		if (ret)
1161			goto out;
1162
1163		/*
1164		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1165		 * the timestamp calculation for us.
1166		 */
1167		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1168
1169		/* We found a timestamp, no need to continue. */
1170		if (cs_timestamp)
1171			break;
1172
1173		/*
1174		 * We didn't find a timestamp so empty all the traceid packet
1175		 * queues before looking for another timestamp packet, either
1176		 * in the current data block or a new one.  Packets that were
1177		 * just decoded are useless since no timestamp has been
1178		 * associated with them.  As such simply discard them.
1179		 */
1180		cs_etm__clear_all_packet_queues(etmq);
1181	}
1182
1183	/*
1184	 * We have a timestamp.  Add it to the min heap to reflect when
1185	 * instructions conveyed by the range packets of this traceID queue
1186	 * started to execute.  Once the same has been done for all the traceID
1187	 * queues of each etmq, redenring and decoding can start in
1188	 * chronological order.
1189	 *
1190	 * Note that packets decoded above are still in the traceID's packet
1191	 * queue and will be processed in cs_etm__process_timestamped_queues().
1192	 */
1193	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1194	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1195out:
1196	return ret;
1197}
1198
1199static inline
1200void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1201				 struct cs_etm_traceid_queue *tidq)
1202{
1203	struct branch_stack *bs_src = tidq->last_branch_rb;
1204	struct branch_stack *bs_dst = tidq->last_branch;
1205	size_t nr = 0;
1206
1207	/*
1208	 * Set the number of records before early exit: ->nr is used to
1209	 * determine how many branches to copy from ->entries.
1210	 */
1211	bs_dst->nr = bs_src->nr;
1212
1213	/*
1214	 * Early exit when there is nothing to copy.
1215	 */
1216	if (!bs_src->nr)
1217		return;
1218
1219	/*
1220	 * As bs_src->entries is a circular buffer, we need to copy from it in
1221	 * two steps.  First, copy the branches from the most recently inserted
1222	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1223	 */
1224	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1225	memcpy(&bs_dst->entries[0],
1226	       &bs_src->entries[tidq->last_branch_pos],
1227	       sizeof(struct branch_entry) * nr);
1228
1229	/*
1230	 * If we wrapped around at least once, the branches from the beginning
1231	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1232	 * are older valid branches: copy them over.  The total number of
1233	 * branches copied over will be equal to the number of branches asked by
1234	 * the user in last_branch_sz.
1235	 */
1236	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1237		memcpy(&bs_dst->entries[nr],
1238		       &bs_src->entries[0],
1239		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1240	}
1241}
1242
1243static inline
1244void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1245{
1246	tidq->last_branch_pos = 0;
1247	tidq->last_branch_rb->nr = 0;
1248}
1249
1250static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1251					 u8 trace_chan_id, u64 addr)
1252{
1253	u8 instrBytes[2];
1254
1255	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1256			   instrBytes, 0);
1257	/*
1258	 * T32 instruction size is indicated by bits[15:11] of the first
1259	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1260	 * denote a 32-bit instruction.
1261	 */
1262	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1263}
1264
1265static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1266{
1267	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1268	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1269		return 0;
1270
1271	return packet->start_addr;
1272}
1273
1274static inline
1275u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1276{
1277	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1278	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1279		return 0;
1280
1281	return packet->end_addr - packet->last_instr_size;
1282}
1283
1284static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1285				     u64 trace_chan_id,
1286				     const struct cs_etm_packet *packet,
1287				     u64 offset)
1288{
1289	if (packet->isa == CS_ETM_ISA_T32) {
1290		u64 addr = packet->start_addr;
1291
1292		while (offset) {
1293			addr += cs_etm__t32_instr_size(etmq,
1294						       trace_chan_id, addr);
1295			offset--;
1296		}
1297		return addr;
1298	}
1299
1300	/* Assume a 4 byte instruction size (A32/A64) */
1301	return packet->start_addr + offset * 4;
1302}
1303
1304static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1305					  struct cs_etm_traceid_queue *tidq)
1306{
1307	struct branch_stack *bs = tidq->last_branch_rb;
1308	struct branch_entry *be;
1309
1310	/*
1311	 * The branches are recorded in a circular buffer in reverse
1312	 * chronological order: we start recording from the last element of the
1313	 * buffer down.  After writing the first element of the stack, move the
1314	 * insert position back to the end of the buffer.
1315	 */
1316	if (!tidq->last_branch_pos)
1317		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1318
1319	tidq->last_branch_pos -= 1;
1320
1321	be       = &bs->entries[tidq->last_branch_pos];
1322	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1323	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1324	/* No support for mispredict */
1325	be->flags.mispred = 0;
1326	be->flags.predicted = 1;
1327
1328	/*
1329	 * Increment bs->nr until reaching the number of last branches asked by
1330	 * the user on the command line.
1331	 */
1332	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1333		bs->nr += 1;
1334}
1335
1336static int cs_etm__inject_event(union perf_event *event,
1337			       struct perf_sample *sample, u64 type)
1338{
1339	event->header.size = perf_event__sample_event_size(sample, type, 0);
1340	return perf_event__synthesize_sample(event, type, 0, sample);
1341}
1342
1343
1344static int
1345cs_etm__get_trace(struct cs_etm_queue *etmq)
1346{
1347	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1348	struct auxtrace_buffer *old_buffer = aux_buffer;
1349	struct auxtrace_queue *queue;
1350
1351	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1352
1353	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1354
1355	/* If no more data, drop the previous auxtrace_buffer and return */
1356	if (!aux_buffer) {
1357		if (old_buffer)
1358			auxtrace_buffer__drop_data(old_buffer);
1359		etmq->buf_len = 0;
1360		return 0;
1361	}
1362
1363	etmq->buffer = aux_buffer;
1364
1365	/* If the aux_buffer doesn't have data associated, try to load it */
1366	if (!aux_buffer->data) {
1367		/* get the file desc associated with the perf data file */
1368		int fd = perf_data__fd(etmq->etm->session->data);
1369
1370		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1371		if (!aux_buffer->data)
1372			return -ENOMEM;
1373	}
1374
1375	/* If valid, drop the previous buffer */
1376	if (old_buffer)
1377		auxtrace_buffer__drop_data(old_buffer);
1378
1379	etmq->buf_used = 0;
1380	etmq->buf_len = aux_buffer->size;
1381	etmq->buf = aux_buffer->data;
1382
1383	return etmq->buf_len;
1384}
1385
1386static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1387			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1388			       ocsd_ex_level el)
1389{
1390	struct machine *machine = cs_etm__get_machine(etmq, el);
1391
1392	if (tid != -1) {
1393		thread__zput(tidq->thread);
1394		tidq->thread = machine__find_thread(machine, -1, tid);
1395	}
1396
1397	/* Couldn't find a known thread */
1398	if (!tidq->thread)
1399		tidq->thread = machine__idle_thread(machine);
1400
1401	tidq->el = el;
1402}
1403
1404int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1405			    u8 trace_chan_id, ocsd_ex_level el)
1406{
1407	struct cs_etm_traceid_queue *tidq;
1408
1409	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1410	if (!tidq)
1411		return -EINVAL;
1412
1413	cs_etm__set_thread(etmq, tidq, tid, el);
1414	return 0;
1415}
1416
1417bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1418{
1419	return !!etmq->etm->timeless_decoding;
1420}
1421
1422static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1423			      u64 trace_chan_id,
1424			      const struct cs_etm_packet *packet,
1425			      struct perf_sample *sample)
1426{
1427	/*
1428	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1429	 * packet, so directly bail out with 'insn_len' = 0.
1430	 */
1431	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1432		sample->insn_len = 0;
1433		return;
1434	}
1435
1436	/*
1437	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1438	 * cs_etm__t32_instr_size().
1439	 */
1440	if (packet->isa == CS_ETM_ISA_T32)
1441		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1442							  sample->ip);
1443	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1444	else
1445		sample->insn_len = 4;
1446
1447	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1448			   (void *)sample->insn, 0);
1449}
1450
1451u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1452{
1453	struct cs_etm_auxtrace *etm = etmq->etm;
1454
1455	if (etm->has_virtual_ts)
1456		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1457	else
1458		return cs_timestamp;
1459}
1460
1461static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1462					       struct cs_etm_traceid_queue *tidq)
1463{
1464	struct cs_etm_auxtrace *etm = etmq->etm;
1465	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1466
1467	if (!etm->timeless_decoding && etm->has_virtual_ts)
1468		return packet_queue->cs_timestamp;
1469	else
1470		return etm->latest_kernel_timestamp;
1471}
1472
1473static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1474					    struct cs_etm_traceid_queue *tidq,
1475					    u64 addr, u64 period)
1476{
1477	int ret = 0;
1478	struct cs_etm_auxtrace *etm = etmq->etm;
1479	union perf_event *event = tidq->event_buf;
1480	struct perf_sample sample = {.ip = 0,};
1481
1482	event->sample.header.type = PERF_RECORD_SAMPLE;
1483	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1484	event->sample.header.size = sizeof(struct perf_event_header);
1485
1486	/* Set time field based on etm auxtrace config. */
1487	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1488
1489	sample.ip = addr;
1490	sample.pid = thread__pid(tidq->thread);
1491	sample.tid = thread__tid(tidq->thread);
1492	sample.id = etmq->etm->instructions_id;
1493	sample.stream_id = etmq->etm->instructions_id;
1494	sample.period = period;
1495	sample.cpu = tidq->packet->cpu;
1496	sample.flags = tidq->prev_packet->flags;
1497	sample.cpumode = event->sample.header.misc;
1498
1499	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1500
1501	if (etm->synth_opts.last_branch)
1502		sample.branch_stack = tidq->last_branch;
1503
1504	if (etm->synth_opts.inject) {
1505		ret = cs_etm__inject_event(event, &sample,
1506					   etm->instructions_sample_type);
1507		if (ret)
1508			return ret;
1509	}
1510
1511	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1512
1513	if (ret)
1514		pr_err(
1515			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1516			ret);
1517
1518	return ret;
1519}
1520
1521/*
1522 * The cs etm packet encodes an instruction range between a branch target
1523 * and the next taken branch. Generate sample accordingly.
1524 */
1525static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1526				       struct cs_etm_traceid_queue *tidq)
1527{
1528	int ret = 0;
1529	struct cs_etm_auxtrace *etm = etmq->etm;
1530	struct perf_sample sample = {.ip = 0,};
1531	union perf_event *event = tidq->event_buf;
1532	struct dummy_branch_stack {
1533		u64			nr;
1534		u64			hw_idx;
1535		struct branch_entry	entries;
1536	} dummy_bs;
1537	u64 ip;
1538
1539	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1540
1541	event->sample.header.type = PERF_RECORD_SAMPLE;
1542	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1543						     tidq->prev_packet_el);
1544	event->sample.header.size = sizeof(struct perf_event_header);
1545
1546	/* Set time field based on etm auxtrace config. */
1547	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1548
1549	sample.ip = ip;
1550	sample.pid = thread__pid(tidq->prev_packet_thread);
1551	sample.tid = thread__tid(tidq->prev_packet_thread);
1552	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1553	sample.id = etmq->etm->branches_id;
1554	sample.stream_id = etmq->etm->branches_id;
1555	sample.period = 1;
1556	sample.cpu = tidq->packet->cpu;
1557	sample.flags = tidq->prev_packet->flags;
1558	sample.cpumode = event->sample.header.misc;
1559
1560	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1561			  &sample);
1562
1563	/*
1564	 * perf report cannot handle events without a branch stack
1565	 */
1566	if (etm->synth_opts.last_branch) {
1567		dummy_bs = (struct dummy_branch_stack){
1568			.nr = 1,
1569			.hw_idx = -1ULL,
1570			.entries = {
1571				.from = sample.ip,
1572				.to = sample.addr,
1573			},
1574		};
1575		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1576	}
1577
1578	if (etm->synth_opts.inject) {
1579		ret = cs_etm__inject_event(event, &sample,
1580					   etm->branches_sample_type);
1581		if (ret)
1582			return ret;
1583	}
1584
1585	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1586
1587	if (ret)
1588		pr_err(
1589		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1590		ret);
1591
1592	return ret;
1593}
1594
1595struct cs_etm_synth {
1596	struct perf_tool dummy_tool;
1597	struct perf_session *session;
1598};
1599
1600static int cs_etm__event_synth(struct perf_tool *tool,
1601			       union perf_event *event,
1602			       struct perf_sample *sample __maybe_unused,
1603			       struct machine *machine __maybe_unused)
1604{
1605	struct cs_etm_synth *cs_etm_synth =
1606		      container_of(tool, struct cs_etm_synth, dummy_tool);
1607
1608	return perf_session__deliver_synth_event(cs_etm_synth->session,
1609						 event, NULL);
1610}
1611
1612static int cs_etm__synth_event(struct perf_session *session,
1613			       struct perf_event_attr *attr, u64 id)
1614{
1615	struct cs_etm_synth cs_etm_synth;
1616
1617	memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1618	cs_etm_synth.session = session;
1619
1620	return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1621					   &id, cs_etm__event_synth);
1622}
1623
1624static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1625				struct perf_session *session)
1626{
1627	struct evlist *evlist = session->evlist;
1628	struct evsel *evsel;
1629	struct perf_event_attr attr;
1630	bool found = false;
1631	u64 id;
1632	int err;
1633
1634	evlist__for_each_entry(evlist, evsel) {
1635		if (evsel->core.attr.type == etm->pmu_type) {
1636			found = true;
1637			break;
1638		}
1639	}
1640
1641	if (!found) {
1642		pr_debug("No selected events with CoreSight Trace data\n");
1643		return 0;
1644	}
1645
1646	memset(&attr, 0, sizeof(struct perf_event_attr));
1647	attr.size = sizeof(struct perf_event_attr);
1648	attr.type = PERF_TYPE_HARDWARE;
1649	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1650	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1651			    PERF_SAMPLE_PERIOD;
1652	if (etm->timeless_decoding)
1653		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1654	else
1655		attr.sample_type |= PERF_SAMPLE_TIME;
1656
1657	attr.exclude_user = evsel->core.attr.exclude_user;
1658	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1659	attr.exclude_hv = evsel->core.attr.exclude_hv;
1660	attr.exclude_host = evsel->core.attr.exclude_host;
1661	attr.exclude_guest = evsel->core.attr.exclude_guest;
1662	attr.sample_id_all = evsel->core.attr.sample_id_all;
1663	attr.read_format = evsel->core.attr.read_format;
1664
1665	/* create new id val to be a fixed offset from evsel id */
1666	id = evsel->core.id[0] + 1000000000;
1667
1668	if (!id)
1669		id = 1;
1670
1671	if (etm->synth_opts.branches) {
1672		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1673		attr.sample_period = 1;
1674		attr.sample_type |= PERF_SAMPLE_ADDR;
1675		err = cs_etm__synth_event(session, &attr, id);
1676		if (err)
1677			return err;
1678		etm->branches_sample_type = attr.sample_type;
1679		etm->branches_id = id;
1680		id += 1;
1681		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1682	}
1683
1684	if (etm->synth_opts.last_branch) {
1685		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1686		/*
1687		 * We don't use the hardware index, but the sample generation
1688		 * code uses the new format branch_stack with this field,
1689		 * so the event attributes must indicate that it's present.
1690		 */
1691		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1692	}
1693
1694	if (etm->synth_opts.instructions) {
1695		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1696		attr.sample_period = etm->synth_opts.period;
1697		etm->instructions_sample_period = attr.sample_period;
1698		err = cs_etm__synth_event(session, &attr, id);
1699		if (err)
1700			return err;
1701		etm->instructions_sample_type = attr.sample_type;
1702		etm->instructions_id = id;
1703		id += 1;
1704	}
1705
1706	return 0;
1707}
1708
1709static int cs_etm__sample(struct cs_etm_queue *etmq,
1710			  struct cs_etm_traceid_queue *tidq)
1711{
1712	struct cs_etm_auxtrace *etm = etmq->etm;
1713	int ret;
1714	u8 trace_chan_id = tidq->trace_chan_id;
1715	u64 instrs_prev;
1716
1717	/* Get instructions remainder from previous packet */
1718	instrs_prev = tidq->period_instructions;
1719
1720	tidq->period_instructions += tidq->packet->instr_count;
1721
1722	/*
1723	 * Record a branch when the last instruction in
1724	 * PREV_PACKET is a branch.
1725	 */
1726	if (etm->synth_opts.last_branch &&
1727	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1728	    tidq->prev_packet->last_instr_taken_branch)
1729		cs_etm__update_last_branch_rb(etmq, tidq);
1730
1731	if (etm->synth_opts.instructions &&
1732	    tidq->period_instructions >= etm->instructions_sample_period) {
1733		/*
1734		 * Emit instruction sample periodically
1735		 * TODO: allow period to be defined in cycles and clock time
1736		 */
1737
1738		/*
1739		 * Below diagram demonstrates the instruction samples
1740		 * generation flows:
1741		 *
1742		 *    Instrs     Instrs       Instrs       Instrs
1743		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1744		 *    |            |            |            |
1745		 *    V            V            V            V
1746		 *   --------------------------------------------------
1747		 *            ^                                  ^
1748		 *            |                                  |
1749		 *         Period                             Period
1750		 *    instructions(Pi)                   instructions(Pi')
1751		 *
1752		 *            |                                  |
1753		 *            \---------------- -----------------/
1754		 *                             V
1755		 *                 tidq->packet->instr_count
1756		 *
1757		 * Instrs Sample(n...) are the synthesised samples occurring
1758		 * every etm->instructions_sample_period instructions - as
1759		 * defined on the perf command line.  Sample(n) is being the
1760		 * last sample before the current etm packet, n+1 to n+3
1761		 * samples are generated from the current etm packet.
1762		 *
1763		 * tidq->packet->instr_count represents the number of
1764		 * instructions in the current etm packet.
1765		 *
1766		 * Period instructions (Pi) contains the number of
1767		 * instructions executed after the sample point(n) from the
1768		 * previous etm packet.  This will always be less than
1769		 * etm->instructions_sample_period.
1770		 *
1771		 * When generate new samples, it combines with two parts
1772		 * instructions, one is the tail of the old packet and another
1773		 * is the head of the new coming packet, to generate
1774		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1775		 * instructions with sample period.  After sample(n+3), the rest
1776		 * instructions will be used by later packet and it is assigned
1777		 * to tidq->period_instructions for next round calculation.
1778		 */
1779
1780		/*
1781		 * Get the initial offset into the current packet instructions;
1782		 * entry conditions ensure that instrs_prev is less than
1783		 * etm->instructions_sample_period.
1784		 */
1785		u64 offset = etm->instructions_sample_period - instrs_prev;
1786		u64 addr;
1787
1788		/* Prepare last branches for instruction sample */
1789		if (etm->synth_opts.last_branch)
1790			cs_etm__copy_last_branch_rb(etmq, tidq);
1791
1792		while (tidq->period_instructions >=
1793				etm->instructions_sample_period) {
1794			/*
1795			 * Calculate the address of the sampled instruction (-1
1796			 * as sample is reported as though instruction has just
1797			 * been executed, but PC has not advanced to next
1798			 * instruction)
1799			 */
1800			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1801						  tidq->packet, offset - 1);
1802			ret = cs_etm__synth_instruction_sample(
1803				etmq, tidq, addr,
1804				etm->instructions_sample_period);
1805			if (ret)
1806				return ret;
1807
1808			offset += etm->instructions_sample_period;
1809			tidq->period_instructions -=
1810				etm->instructions_sample_period;
1811		}
1812	}
1813
1814	if (etm->synth_opts.branches) {
1815		bool generate_sample = false;
1816
1817		/* Generate sample for tracing on packet */
1818		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1819			generate_sample = true;
1820
1821		/* Generate sample for branch taken packet */
1822		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1823		    tidq->prev_packet->last_instr_taken_branch)
1824			generate_sample = true;
1825
1826		if (generate_sample) {
1827			ret = cs_etm__synth_branch_sample(etmq, tidq);
1828			if (ret)
1829				return ret;
1830		}
1831	}
1832
1833	cs_etm__packet_swap(etm, tidq);
1834
1835	return 0;
1836}
1837
1838static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1839{
1840	/*
1841	 * When the exception packet is inserted, whether the last instruction
1842	 * in previous range packet is taken branch or not, we need to force
1843	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1844	 * to generate branch sample for the instruction range before the
1845	 * exception is trapped to kernel or before the exception returning.
1846	 *
1847	 * The exception packet includes the dummy address values, so don't
1848	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1849	 * for generating instruction and branch samples.
1850	 */
1851	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1852		tidq->prev_packet->last_instr_taken_branch = true;
1853
1854	return 0;
1855}
1856
1857static int cs_etm__flush(struct cs_etm_queue *etmq,
1858			 struct cs_etm_traceid_queue *tidq)
1859{
1860	int err = 0;
1861	struct cs_etm_auxtrace *etm = etmq->etm;
1862
1863	/* Handle start tracing packet */
1864	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1865		goto swap_packet;
1866
1867	if (etmq->etm->synth_opts.last_branch &&
1868	    etmq->etm->synth_opts.instructions &&
1869	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1870		u64 addr;
1871
1872		/* Prepare last branches for instruction sample */
1873		cs_etm__copy_last_branch_rb(etmq, tidq);
1874
1875		/*
1876		 * Generate a last branch event for the branches left in the
1877		 * circular buffer at the end of the trace.
1878		 *
1879		 * Use the address of the end of the last reported execution
1880		 * range
1881		 */
1882		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1883
1884		err = cs_etm__synth_instruction_sample(
1885			etmq, tidq, addr,
1886			tidq->period_instructions);
1887		if (err)
1888			return err;
1889
1890		tidq->period_instructions = 0;
1891
1892	}
1893
1894	if (etm->synth_opts.branches &&
1895	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1896		err = cs_etm__synth_branch_sample(etmq, tidq);
1897		if (err)
1898			return err;
1899	}
1900
1901swap_packet:
1902	cs_etm__packet_swap(etm, tidq);
1903
1904	/* Reset last branches after flush the trace */
1905	if (etm->synth_opts.last_branch)
1906		cs_etm__reset_last_branch_rb(tidq);
1907
1908	return err;
1909}
1910
1911static int cs_etm__end_block(struct cs_etm_queue *etmq,
1912			     struct cs_etm_traceid_queue *tidq)
1913{
1914	int err;
1915
1916	/*
1917	 * It has no new packet coming and 'etmq->packet' contains the stale
1918	 * packet which was set at the previous time with packets swapping;
1919	 * so skip to generate branch sample to avoid stale packet.
1920	 *
1921	 * For this case only flush branch stack and generate a last branch
1922	 * event for the branches left in the circular buffer at the end of
1923	 * the trace.
1924	 */
1925	if (etmq->etm->synth_opts.last_branch &&
1926	    etmq->etm->synth_opts.instructions &&
1927	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1928		u64 addr;
1929
1930		/* Prepare last branches for instruction sample */
1931		cs_etm__copy_last_branch_rb(etmq, tidq);
1932
1933		/*
1934		 * Use the address of the end of the last reported execution
1935		 * range.
1936		 */
1937		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1938
1939		err = cs_etm__synth_instruction_sample(
1940			etmq, tidq, addr,
1941			tidq->period_instructions);
1942		if (err)
1943			return err;
1944
1945		tidq->period_instructions = 0;
1946	}
1947
1948	return 0;
1949}
1950/*
1951 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1952 *			   if need be.
1953 * Returns:	< 0	if error
1954 *		= 0	if no more auxtrace_buffer to read
1955 *		> 0	if the current buffer isn't empty yet
1956 */
1957static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1958{
1959	int ret;
1960
1961	if (!etmq->buf_len) {
1962		ret = cs_etm__get_trace(etmq);
1963		if (ret <= 0)
1964			return ret;
1965		/*
1966		 * We cannot assume consecutive blocks in the data file
1967		 * are contiguous, reset the decoder to force re-sync.
1968		 */
1969		ret = cs_etm_decoder__reset(etmq->decoder);
1970		if (ret)
1971			return ret;
1972	}
1973
1974	return etmq->buf_len;
1975}
1976
1977static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1978				 struct cs_etm_packet *packet,
1979				 u64 end_addr)
1980{
1981	/* Initialise to keep compiler happy */
1982	u16 instr16 = 0;
1983	u32 instr32 = 0;
1984	u64 addr;
1985
1986	switch (packet->isa) {
1987	case CS_ETM_ISA_T32:
1988		/*
1989		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1990		 *
1991		 *  b'15         b'8
1992		 * +-----------------+--------+
1993		 * | 1 1 0 1 1 1 1 1 |  imm8  |
1994		 * +-----------------+--------+
1995		 *
1996		 * According to the specification, it only defines SVC for T32
1997		 * with 16 bits instruction and has no definition for 32bits;
1998		 * so below only read 2 bytes as instruction size for T32.
1999		 */
2000		addr = end_addr - 2;
2001		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2002				   (u8 *)&instr16, 0);
2003		if ((instr16 & 0xFF00) == 0xDF00)
2004			return true;
2005
2006		break;
2007	case CS_ETM_ISA_A32:
2008		/*
2009		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2010		 *
2011		 *  b'31 b'28 b'27 b'24
2012		 * +---------+---------+-------------------------+
2013		 * |  !1111  | 1 1 1 1 |        imm24            |
2014		 * +---------+---------+-------------------------+
2015		 */
2016		addr = end_addr - 4;
2017		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2018				   (u8 *)&instr32, 0);
2019		if ((instr32 & 0x0F000000) == 0x0F000000 &&
2020		    (instr32 & 0xF0000000) != 0xF0000000)
2021			return true;
2022
2023		break;
2024	case CS_ETM_ISA_A64:
2025		/*
2026		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2027		 *
2028		 *  b'31               b'21           b'4     b'0
2029		 * +-----------------------+---------+-----------+
2030		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2031		 * +-----------------------+---------+-----------+
2032		 */
2033		addr = end_addr - 4;
2034		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2035				   (u8 *)&instr32, 0);
2036		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2037			return true;
2038
2039		break;
2040	case CS_ETM_ISA_UNKNOWN:
2041	default:
2042		break;
2043	}
2044
2045	return false;
2046}
2047
2048static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2049			       struct cs_etm_traceid_queue *tidq, u64 magic)
2050{
2051	u8 trace_chan_id = tidq->trace_chan_id;
2052	struct cs_etm_packet *packet = tidq->packet;
2053	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2054
2055	if (magic == __perf_cs_etmv3_magic)
2056		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2057			return true;
2058
2059	/*
2060	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2061	 * HVC cases; need to check if it's SVC instruction based on
2062	 * packet address.
2063	 */
2064	if (magic == __perf_cs_etmv4_magic) {
2065		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2066		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2067					 prev_packet->end_addr))
2068			return true;
2069	}
2070
2071	return false;
2072}
2073
2074static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2075				       u64 magic)
2076{
2077	struct cs_etm_packet *packet = tidq->packet;
2078
2079	if (magic == __perf_cs_etmv3_magic)
2080		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2081		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2082		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2083		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2084		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2085			return true;
2086
2087	if (magic == __perf_cs_etmv4_magic)
2088		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2089		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2090		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2091		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2092		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2093		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2094		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2095			return true;
2096
2097	return false;
2098}
2099
2100static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2101				      struct cs_etm_traceid_queue *tidq,
2102				      u64 magic)
2103{
2104	u8 trace_chan_id = tidq->trace_chan_id;
2105	struct cs_etm_packet *packet = tidq->packet;
2106	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2107
2108	if (magic == __perf_cs_etmv3_magic)
2109		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2110		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2111		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2112		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2113		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2114		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2115		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2116			return true;
2117
2118	if (magic == __perf_cs_etmv4_magic) {
2119		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2120		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2121		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2122		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2123			return true;
2124
2125		/*
2126		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2127		 * (SMC, HVC) are taken as sync exceptions.
2128		 */
2129		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2130		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2131					  prev_packet->end_addr))
2132			return true;
2133
2134		/*
2135		 * ETMv4 has 5 bits for exception number; if the numbers
2136		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2137		 * they are implementation defined exceptions.
2138		 *
2139		 * For this case, simply take it as sync exception.
2140		 */
2141		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2142		    packet->exception_number <= CS_ETMV4_EXC_END)
2143			return true;
2144	}
2145
2146	return false;
2147}
2148
2149static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2150				    struct cs_etm_traceid_queue *tidq)
2151{
2152	struct cs_etm_packet *packet = tidq->packet;
2153	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2154	u8 trace_chan_id = tidq->trace_chan_id;
2155	u64 magic;
2156	int ret;
2157
2158	switch (packet->sample_type) {
2159	case CS_ETM_RANGE:
2160		/*
2161		 * Immediate branch instruction without neither link nor
2162		 * return flag, it's normal branch instruction within
2163		 * the function.
2164		 */
2165		if (packet->last_instr_type == OCSD_INSTR_BR &&
2166		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2167			packet->flags = PERF_IP_FLAG_BRANCH;
2168
2169			if (packet->last_instr_cond)
2170				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2171		}
2172
2173		/*
2174		 * Immediate branch instruction with link (e.g. BL), this is
2175		 * branch instruction for function call.
2176		 */
2177		if (packet->last_instr_type == OCSD_INSTR_BR &&
2178		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2179			packet->flags = PERF_IP_FLAG_BRANCH |
2180					PERF_IP_FLAG_CALL;
2181
2182		/*
2183		 * Indirect branch instruction with link (e.g. BLR), this is
2184		 * branch instruction for function call.
2185		 */
2186		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2187		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2188			packet->flags = PERF_IP_FLAG_BRANCH |
2189					PERF_IP_FLAG_CALL;
2190
2191		/*
2192		 * Indirect branch instruction with subtype of
2193		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2194		 * function return for A32/T32.
2195		 */
2196		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2197		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2198			packet->flags = PERF_IP_FLAG_BRANCH |
2199					PERF_IP_FLAG_RETURN;
2200
2201		/*
2202		 * Indirect branch instruction without link (e.g. BR), usually
2203		 * this is used for function return, especially for functions
2204		 * within dynamic link lib.
2205		 */
2206		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2207		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2208			packet->flags = PERF_IP_FLAG_BRANCH |
2209					PERF_IP_FLAG_RETURN;
2210
2211		/* Return instruction for function return. */
2212		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2213		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2214			packet->flags = PERF_IP_FLAG_BRANCH |
2215					PERF_IP_FLAG_RETURN;
2216
2217		/*
2218		 * Decoder might insert a discontinuity in the middle of
2219		 * instruction packets, fixup prev_packet with flag
2220		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2221		 */
2222		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2223			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2224					      PERF_IP_FLAG_TRACE_BEGIN;
2225
2226		/*
2227		 * If the previous packet is an exception return packet
2228		 * and the return address just follows SVC instruction,
2229		 * it needs to calibrate the previous packet sample flags
2230		 * as PERF_IP_FLAG_SYSCALLRET.
2231		 */
2232		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2233					   PERF_IP_FLAG_RETURN |
2234					   PERF_IP_FLAG_INTERRUPT) &&
2235		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2236					 packet, packet->start_addr))
2237			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2238					     PERF_IP_FLAG_RETURN |
2239					     PERF_IP_FLAG_SYSCALLRET;
2240		break;
2241	case CS_ETM_DISCONTINUITY:
2242		/*
2243		 * The trace is discontinuous, if the previous packet is
2244		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2245		 * for previous packet.
2246		 */
2247		if (prev_packet->sample_type == CS_ETM_RANGE)
2248			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2249					      PERF_IP_FLAG_TRACE_END;
2250		break;
2251	case CS_ETM_EXCEPTION:
2252		ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2253		if (ret)
2254			return ret;
2255
2256		/* The exception is for system call. */
2257		if (cs_etm__is_syscall(etmq, tidq, magic))
2258			packet->flags = PERF_IP_FLAG_BRANCH |
2259					PERF_IP_FLAG_CALL |
2260					PERF_IP_FLAG_SYSCALLRET;
2261		/*
2262		 * The exceptions are triggered by external signals from bus,
2263		 * interrupt controller, debug module, PE reset or halt.
2264		 */
2265		else if (cs_etm__is_async_exception(tidq, magic))
2266			packet->flags = PERF_IP_FLAG_BRANCH |
2267					PERF_IP_FLAG_CALL |
2268					PERF_IP_FLAG_ASYNC |
2269					PERF_IP_FLAG_INTERRUPT;
2270		/*
2271		 * Otherwise, exception is caused by trap, instruction &
2272		 * data fault, or alignment errors.
2273		 */
2274		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2275			packet->flags = PERF_IP_FLAG_BRANCH |
2276					PERF_IP_FLAG_CALL |
2277					PERF_IP_FLAG_INTERRUPT;
2278
2279		/*
2280		 * When the exception packet is inserted, since exception
2281		 * packet is not used standalone for generating samples
2282		 * and it's affiliation to the previous instruction range
2283		 * packet; so set previous range packet flags to tell perf
2284		 * it is an exception taken branch.
2285		 */
2286		if (prev_packet->sample_type == CS_ETM_RANGE)
2287			prev_packet->flags = packet->flags;
2288		break;
2289	case CS_ETM_EXCEPTION_RET:
2290		/*
2291		 * When the exception return packet is inserted, since
2292		 * exception return packet is not used standalone for
2293		 * generating samples and it's affiliation to the previous
2294		 * instruction range packet; so set previous range packet
2295		 * flags to tell perf it is an exception return branch.
2296		 *
2297		 * The exception return can be for either system call or
2298		 * other exception types; unfortunately the packet doesn't
2299		 * contain exception type related info so we cannot decide
2300		 * the exception type purely based on exception return packet.
2301		 * If we record the exception number from exception packet and
2302		 * reuse it for exception return packet, this is not reliable
2303		 * due the trace can be discontinuity or the interrupt can
2304		 * be nested, thus the recorded exception number cannot be
2305		 * used for exception return packet for these two cases.
2306		 *
2307		 * For exception return packet, we only need to distinguish the
2308		 * packet is for system call or for other types.  Thus the
2309		 * decision can be deferred when receive the next packet which
2310		 * contains the return address, based on the return address we
2311		 * can read out the previous instruction and check if it's a
2312		 * system call instruction and then calibrate the sample flag
2313		 * as needed.
2314		 */
2315		if (prev_packet->sample_type == CS_ETM_RANGE)
2316			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2317					     PERF_IP_FLAG_RETURN |
2318					     PERF_IP_FLAG_INTERRUPT;
2319		break;
2320	case CS_ETM_EMPTY:
2321	default:
2322		break;
2323	}
2324
2325	return 0;
2326}
2327
2328static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2329{
2330	int ret = 0;
2331	size_t processed = 0;
2332
2333	/*
2334	 * Packets are decoded and added to the decoder's packet queue
2335	 * until the decoder packet processing callback has requested that
2336	 * processing stops or there is nothing left in the buffer.  Normal
2337	 * operations that stop processing are a timestamp packet or a full
2338	 * decoder buffer queue.
2339	 */
2340	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2341						 etmq->offset,
2342						 &etmq->buf[etmq->buf_used],
2343						 etmq->buf_len,
2344						 &processed);
2345	if (ret)
2346		goto out;
2347
2348	etmq->offset += processed;
2349	etmq->buf_used += processed;
2350	etmq->buf_len -= processed;
2351
2352out:
2353	return ret;
2354}
2355
2356static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2357					 struct cs_etm_traceid_queue *tidq)
2358{
2359	int ret;
2360	struct cs_etm_packet_queue *packet_queue;
2361
2362	packet_queue = &tidq->packet_queue;
2363
2364	/* Process each packet in this chunk */
2365	while (1) {
2366		ret = cs_etm_decoder__get_packet(packet_queue,
2367						 tidq->packet);
2368		if (ret <= 0)
2369			/*
2370			 * Stop processing this chunk on
2371			 * end of data or error
2372			 */
2373			break;
2374
2375		/*
2376		 * Since packet addresses are swapped in packet
2377		 * handling within below switch() statements,
2378		 * thus setting sample flags must be called
2379		 * prior to switch() statement to use address
2380		 * information before packets swapping.
2381		 */
2382		ret = cs_etm__set_sample_flags(etmq, tidq);
2383		if (ret < 0)
2384			break;
2385
2386		switch (tidq->packet->sample_type) {
2387		case CS_ETM_RANGE:
2388			/*
2389			 * If the packet contains an instruction
2390			 * range, generate instruction sequence
2391			 * events.
2392			 */
2393			cs_etm__sample(etmq, tidq);
2394			break;
2395		case CS_ETM_EXCEPTION:
2396		case CS_ETM_EXCEPTION_RET:
2397			/*
2398			 * If the exception packet is coming,
2399			 * make sure the previous instruction
2400			 * range packet to be handled properly.
2401			 */
2402			cs_etm__exception(tidq);
2403			break;
2404		case CS_ETM_DISCONTINUITY:
2405			/*
2406			 * Discontinuity in trace, flush
2407			 * previous branch stack
2408			 */
2409			cs_etm__flush(etmq, tidq);
2410			break;
2411		case CS_ETM_EMPTY:
2412			/*
2413			 * Should not receive empty packet,
2414			 * report error.
2415			 */
2416			pr_err("CS ETM Trace: empty packet\n");
2417			return -EINVAL;
2418		default:
2419			break;
2420		}
2421	}
2422
2423	return ret;
2424}
2425
2426static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2427{
2428	int idx;
2429	struct int_node *inode;
2430	struct cs_etm_traceid_queue *tidq;
2431	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2432
2433	intlist__for_each_entry(inode, traceid_queues_list) {
2434		idx = (int)(intptr_t)inode->priv;
2435		tidq = etmq->traceid_queues[idx];
2436
2437		/* Ignore return value */
2438		cs_etm__process_traceid_queue(etmq, tidq);
2439
2440		/*
2441		 * Generate an instruction sample with the remaining
2442		 * branchstack entries.
2443		 */
2444		cs_etm__flush(etmq, tidq);
2445	}
2446}
2447
2448static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2449{
2450	int err = 0;
2451	struct cs_etm_traceid_queue *tidq;
2452
2453	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2454	if (!tidq)
2455		return -EINVAL;
2456
2457	/* Go through each buffer in the queue and decode them one by one */
2458	while (1) {
2459		err = cs_etm__get_data_block(etmq);
2460		if (err <= 0)
2461			return err;
2462
2463		/* Run trace decoder until buffer consumed or end of trace */
2464		do {
2465			err = cs_etm__decode_data_block(etmq);
2466			if (err)
2467				return err;
2468
2469			/*
2470			 * Process each packet in this chunk, nothing to do if
2471			 * an error occurs other than hoping the next one will
2472			 * be better.
2473			 */
2474			err = cs_etm__process_traceid_queue(etmq, tidq);
2475
2476		} while (etmq->buf_len);
2477
2478		if (err == 0)
2479			/* Flush any remaining branch stack entries */
2480			err = cs_etm__end_block(etmq, tidq);
2481	}
2482
2483	return err;
2484}
2485
2486static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2487{
2488	int idx, err = 0;
2489	struct cs_etm_traceid_queue *tidq;
2490	struct int_node *inode;
2491
2492	/* Go through each buffer in the queue and decode them one by one */
2493	while (1) {
2494		err = cs_etm__get_data_block(etmq);
2495		if (err <= 0)
2496			return err;
2497
2498		/* Run trace decoder until buffer consumed or end of trace */
2499		do {
2500			err = cs_etm__decode_data_block(etmq);
2501			if (err)
2502				return err;
2503
2504			/*
2505			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2506			 * single traceID queue because each TID has a separate
2507			 * buffer. But here in per-cpu mode we need to iterate
2508			 * over each channel instead.
2509			 */
2510			intlist__for_each_entry(inode,
2511						etmq->traceid_queues_list) {
2512				idx = (int)(intptr_t)inode->priv;
2513				tidq = etmq->traceid_queues[idx];
2514				cs_etm__process_traceid_queue(etmq, tidq);
2515			}
2516		} while (etmq->buf_len);
2517
2518		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2519			idx = (int)(intptr_t)inode->priv;
2520			tidq = etmq->traceid_queues[idx];
2521			/* Flush any remaining branch stack entries */
2522			err = cs_etm__end_block(etmq, tidq);
2523			if (err)
2524				return err;
2525		}
2526	}
2527
2528	return err;
2529}
2530
2531static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2532					   pid_t tid)
2533{
2534	unsigned int i;
2535	struct auxtrace_queues *queues = &etm->queues;
2536
2537	for (i = 0; i < queues->nr_queues; i++) {
2538		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2539		struct cs_etm_queue *etmq = queue->priv;
2540		struct cs_etm_traceid_queue *tidq;
2541
2542		if (!etmq)
2543			continue;
2544
2545		if (etm->per_thread_decoding) {
2546			tidq = cs_etm__etmq_get_traceid_queue(
2547				etmq, CS_ETM_PER_THREAD_TRACEID);
2548
2549			if (!tidq)
2550				continue;
2551
2552			if (tid == -1 || thread__tid(tidq->thread) == tid)
2553				cs_etm__run_per_thread_timeless_decoder(etmq);
2554		} else
2555			cs_etm__run_per_cpu_timeless_decoder(etmq);
2556	}
2557
2558	return 0;
2559}
2560
2561static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2562{
2563	int ret = 0;
2564	unsigned int cs_queue_nr, queue_nr, i;
2565	u8 trace_chan_id;
2566	u64 cs_timestamp;
2567	struct auxtrace_queue *queue;
2568	struct cs_etm_queue *etmq;
2569	struct cs_etm_traceid_queue *tidq;
2570
2571	/*
2572	 * Pre-populate the heap with one entry from each queue so that we can
2573	 * start processing in time order across all queues.
2574	 */
2575	for (i = 0; i < etm->queues.nr_queues; i++) {
2576		etmq = etm->queues.queue_array[i].priv;
2577		if (!etmq)
2578			continue;
2579
2580		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2581		if (ret)
2582			return ret;
2583	}
2584
2585	while (1) {
2586		if (!etm->heap.heap_cnt)
2587			goto out;
2588
2589		/* Take the entry at the top of the min heap */
2590		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2591		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2592		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2593		queue = &etm->queues.queue_array[queue_nr];
2594		etmq = queue->priv;
2595
2596		/*
2597		 * Remove the top entry from the heap since we are about
2598		 * to process it.
2599		 */
2600		auxtrace_heap__pop(&etm->heap);
2601
2602		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2603		if (!tidq) {
2604			/*
2605			 * No traceID queue has been allocated for this traceID,
2606			 * which means something somewhere went very wrong.  No
2607			 * other choice than simply exit.
2608			 */
2609			ret = -EINVAL;
2610			goto out;
2611		}
2612
2613		/*
2614		 * Packets associated with this timestamp are already in
2615		 * the etmq's traceID queue, so process them.
2616		 */
2617		ret = cs_etm__process_traceid_queue(etmq, tidq);
2618		if (ret < 0)
2619			goto out;
2620
2621		/*
2622		 * Packets for this timestamp have been processed, time to
2623		 * move on to the next timestamp, fetching a new auxtrace_buffer
2624		 * if need be.
2625		 */
2626refetch:
2627		ret = cs_etm__get_data_block(etmq);
2628		if (ret < 0)
2629			goto out;
2630
2631		/*
2632		 * No more auxtrace_buffers to process in this etmq, simply
2633		 * move on to another entry in the auxtrace_heap.
2634		 */
2635		if (!ret)
2636			continue;
2637
2638		ret = cs_etm__decode_data_block(etmq);
2639		if (ret)
2640			goto out;
2641
2642		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2643
2644		if (!cs_timestamp) {
2645			/*
2646			 * Function cs_etm__decode_data_block() returns when
2647			 * there is no more traces to decode in the current
2648			 * auxtrace_buffer OR when a timestamp has been
2649			 * encountered on any of the traceID queues.  Since we
2650			 * did not get a timestamp, there is no more traces to
2651			 * process in this auxtrace_buffer.  As such empty and
2652			 * flush all traceID queues.
2653			 */
2654			cs_etm__clear_all_traceid_queues(etmq);
2655
2656			/* Fetch another auxtrace_buffer for this etmq */
2657			goto refetch;
2658		}
2659
2660		/*
2661		 * Add to the min heap the timestamp for packets that have
2662		 * just been decoded.  They will be processed and synthesized
2663		 * during the next call to cs_etm__process_traceid_queue() for
2664		 * this queue/traceID.
2665		 */
2666		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2667		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2668	}
2669
2670out:
2671	return ret;
2672}
2673
2674static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2675					union perf_event *event)
2676{
2677	struct thread *th;
2678
2679	if (etm->timeless_decoding)
2680		return 0;
2681
2682	/*
2683	 * Add the tid/pid to the log so that we can get a match when we get a
2684	 * contextID from the decoder. Only track for the host: only kernel
2685	 * trace is supported for guests which wouldn't need pids so this should
2686	 * be fine.
2687	 */
2688	th = machine__findnew_thread(&etm->session->machines.host,
2689				     event->itrace_start.pid,
2690				     event->itrace_start.tid);
2691	if (!th)
2692		return -ENOMEM;
2693
2694	thread__put(th);
2695
2696	return 0;
2697}
2698
2699static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2700					   union perf_event *event)
2701{
2702	struct thread *th;
2703	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2704
2705	/*
2706	 * Context switch in per-thread mode are irrelevant since perf
2707	 * will start/stop tracing as the process is scheduled.
2708	 */
2709	if (etm->timeless_decoding)
2710		return 0;
2711
2712	/*
2713	 * SWITCH_IN events carry the next process to be switched out while
2714	 * SWITCH_OUT events carry the process to be switched in.  As such
2715	 * we don't care about IN events.
2716	 */
2717	if (!out)
2718		return 0;
2719
2720	/*
2721	 * Add the tid/pid to the log so that we can get a match when we get a
2722	 * contextID from the decoder. Only track for the host: only kernel
2723	 * trace is supported for guests which wouldn't need pids so this should
2724	 * be fine.
2725	 */
2726	th = machine__findnew_thread(&etm->session->machines.host,
2727				     event->context_switch.next_prev_pid,
2728				     event->context_switch.next_prev_tid);
2729	if (!th)
2730		return -ENOMEM;
2731
2732	thread__put(th);
2733
2734	return 0;
2735}
2736
2737static int cs_etm__process_event(struct perf_session *session,
2738				 union perf_event *event,
2739				 struct perf_sample *sample,
2740				 struct perf_tool *tool)
2741{
2742	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2743						   struct cs_etm_auxtrace,
2744						   auxtrace);
2745
2746	if (dump_trace)
2747		return 0;
2748
2749	if (!tool->ordered_events) {
2750		pr_err("CoreSight ETM Trace requires ordered events\n");
2751		return -EINVAL;
2752	}
2753
2754	switch (event->header.type) {
2755	case PERF_RECORD_EXIT:
2756		/*
2757		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2758		 * start the decode because we know there will be no more trace from
2759		 * this thread. All this does is emit samples earlier than waiting for
2760		 * the flush in other modes, but with timestamps it makes sense to wait
2761		 * for flush so that events from different threads are interleaved
2762		 * properly.
2763		 */
2764		if (etm->per_thread_decoding && etm->timeless_decoding)
2765			return cs_etm__process_timeless_queues(etm,
2766							       event->fork.tid);
2767		break;
2768
2769	case PERF_RECORD_ITRACE_START:
2770		return cs_etm__process_itrace_start(etm, event);
2771
2772	case PERF_RECORD_SWITCH_CPU_WIDE:
2773		return cs_etm__process_switch_cpu_wide(etm, event);
2774
2775	case PERF_RECORD_AUX:
2776		/*
2777		 * Record the latest kernel timestamp available in the header
2778		 * for samples so that synthesised samples occur from this point
2779		 * onwards.
2780		 */
2781		if (sample->time && (sample->time != (u64)-1))
2782			etm->latest_kernel_timestamp = sample->time;
2783		break;
2784
2785	default:
2786		break;
2787	}
2788
2789	return 0;
2790}
2791
2792static void dump_queued_data(struct cs_etm_auxtrace *etm,
2793			     struct perf_record_auxtrace *event)
2794{
2795	struct auxtrace_buffer *buf;
2796	unsigned int i;
2797	/*
2798	 * Find all buffers with same reference in the queues and dump them.
2799	 * This is because the queues can contain multiple entries of the same
2800	 * buffer that were split on aux records.
2801	 */
2802	for (i = 0; i < etm->queues.nr_queues; ++i)
2803		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2804			if (buf->reference == event->reference)
2805				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2806}
2807
2808static int cs_etm__process_auxtrace_event(struct perf_session *session,
2809					  union perf_event *event,
2810					  struct perf_tool *tool __maybe_unused)
2811{
2812	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2813						   struct cs_etm_auxtrace,
2814						   auxtrace);
2815	if (!etm->data_queued) {
2816		struct auxtrace_buffer *buffer;
2817		off_t  data_offset;
2818		int fd = perf_data__fd(session->data);
2819		bool is_pipe = perf_data__is_pipe(session->data);
2820		int err;
2821		int idx = event->auxtrace.idx;
2822
2823		if (is_pipe)
2824			data_offset = 0;
2825		else {
2826			data_offset = lseek(fd, 0, SEEK_CUR);
2827			if (data_offset == -1)
2828				return -errno;
2829		}
2830
2831		err = auxtrace_queues__add_event(&etm->queues, session,
2832						 event, data_offset, &buffer);
2833		if (err)
2834			return err;
2835
2836		/*
2837		 * Knowing if the trace is formatted or not requires a lookup of
2838		 * the aux record so only works in non-piped mode where data is
2839		 * queued in cs_etm__queue_aux_records(). Always assume
2840		 * formatted in piped mode (true).
2841		 */
2842		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2843					  idx, true, -1);
2844		if (err)
2845			return err;
2846
2847		if (dump_trace)
2848			if (auxtrace_buffer__get_data(buffer, fd)) {
2849				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2850				auxtrace_buffer__put_data(buffer);
2851			}
2852	} else if (dump_trace)
2853		dump_queued_data(etm, &event->auxtrace);
2854
2855	return 0;
2856}
2857
2858static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2859{
2860	struct evsel *evsel;
2861	struct evlist *evlist = etm->session->evlist;
2862
2863	/* Override timeless mode with user input from --itrace=Z */
2864	if (etm->synth_opts.timeless_decoding) {
2865		etm->timeless_decoding = true;
2866		return 0;
2867	}
2868
2869	/*
2870	 * Find the cs_etm evsel and look at what its timestamp setting was
2871	 */
2872	evlist__for_each_entry(evlist, evsel)
2873		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2874			etm->timeless_decoding =
2875				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2876			return 0;
2877		}
2878
2879	pr_err("CS ETM: Couldn't find ETM evsel\n");
2880	return -EINVAL;
2881}
2882
2883/*
2884 * Read a single cpu parameter block from the auxtrace_info priv block.
2885 *
2886 * For version 1 there is a per cpu nr_params entry. If we are handling
2887 * version 1 file, then there may be less, the same, or more params
2888 * indicated by this value than the compile time number we understand.
2889 *
2890 * For a version 0 info block, there are a fixed number, and we need to
2891 * fill out the nr_param value in the metadata we create.
2892 */
2893static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2894				    int out_blk_size, int nr_params_v0)
2895{
2896	u64 *metadata = NULL;
2897	int hdr_version;
2898	int nr_in_params, nr_out_params, nr_cmn_params;
2899	int i, k;
2900
2901	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2902	if (!metadata)
2903		return NULL;
2904
2905	/* read block current index & version */
2906	i = *buff_in_offset;
2907	hdr_version = buff_in[CS_HEADER_VERSION];
2908
2909	if (!hdr_version) {
2910	/* read version 0 info block into a version 1 metadata block  */
2911		nr_in_params = nr_params_v0;
2912		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2913		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2914		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2915		/* remaining block params at offset +1 from source */
2916		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2917			metadata[k + 1] = buff_in[i + k];
2918		/* version 0 has 2 common params */
2919		nr_cmn_params = 2;
2920	} else {
2921	/* read version 1 info block - input and output nr_params may differ */
2922		/* version 1 has 3 common params */
2923		nr_cmn_params = 3;
2924		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2925
2926		/* if input has more params than output - skip excess */
2927		nr_out_params = nr_in_params + nr_cmn_params;
2928		if (nr_out_params > out_blk_size)
2929			nr_out_params = out_blk_size;
2930
2931		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2932			metadata[k] = buff_in[i + k];
2933
2934		/* record the actual nr params we copied */
2935		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2936	}
2937
2938	/* adjust in offset by number of in params used */
2939	i += nr_in_params + nr_cmn_params;
2940	*buff_in_offset = i;
2941	return metadata;
2942}
2943
2944/**
2945 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2946 * on the bounds of aux_event, if it matches with the buffer that's at
2947 * file_offset.
2948 *
2949 * Normally, whole auxtrace buffers would be added to the queue. But we
2950 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2951 * is reset across each buffer, so splitting the buffers up in advance has
2952 * the same effect.
2953 */
2954static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2955				      struct perf_record_aux *aux_event, struct perf_sample *sample)
2956{
2957	int err;
2958	char buf[PERF_SAMPLE_MAX_SIZE];
2959	union perf_event *auxtrace_event_union;
2960	struct perf_record_auxtrace *auxtrace_event;
2961	union perf_event auxtrace_fragment;
2962	__u64 aux_offset, aux_size;
2963	__u32 idx;
2964	bool formatted;
2965
2966	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2967						   struct cs_etm_auxtrace,
2968						   auxtrace);
2969
2970	/*
2971	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2972	 * from looping through the auxtrace index.
2973	 */
2974	err = perf_session__peek_event(session, file_offset, buf,
2975				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2976	if (err)
2977		return err;
2978	auxtrace_event = &auxtrace_event_union->auxtrace;
2979	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2980		return -EINVAL;
2981
2982	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2983		auxtrace_event->header.size != sz) {
2984		return -EINVAL;
2985	}
2986
2987	/*
2988	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2989	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2990	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2991	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2992	 * Return 'not found' if mismatch.
2993	 */
2994	if (auxtrace_event->cpu == (__u32) -1) {
2995		etm->per_thread_decoding = true;
2996		if (auxtrace_event->tid != sample->tid)
2997			return 1;
2998	} else if (auxtrace_event->cpu != sample->cpu) {
2999		if (etm->per_thread_decoding) {
3000			/*
3001			 * Found a per-cpu buffer after a per-thread one was
3002			 * already found
3003			 */
3004			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3005			return -EINVAL;
3006		}
3007		return 1;
3008	}
3009
3010	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3011		/*
3012		 * Clamp size in snapshot mode. The buffer size is clamped in
3013		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3014		 * the buffer size.
3015		 */
3016		aux_size = min(aux_event->aux_size, auxtrace_event->size);
3017
3018		/*
3019		 * In this mode, the head also points to the end of the buffer so aux_offset
3020		 * needs to have the size subtracted so it points to the beginning as in normal mode
3021		 */
3022		aux_offset = aux_event->aux_offset - aux_size;
3023	} else {
3024		aux_size = aux_event->aux_size;
3025		aux_offset = aux_event->aux_offset;
3026	}
3027
3028	if (aux_offset >= auxtrace_event->offset &&
3029	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3030		/*
3031		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3032		 * based on the sizes of the aux event, and queue that fragment.
3033		 */
3034		auxtrace_fragment.auxtrace = *auxtrace_event;
3035		auxtrace_fragment.auxtrace.size = aux_size;
3036		auxtrace_fragment.auxtrace.offset = aux_offset;
3037		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3038
3039		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3040			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3041		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3042						 file_offset, NULL);
3043		if (err)
3044			return err;
3045
3046		idx = auxtrace_event->idx;
3047		formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3048		return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3049					   idx, formatted, sample->cpu);
3050	}
3051
3052	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3053	return 1;
3054}
3055
3056static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3057					u64 offset __maybe_unused, void *data __maybe_unused)
3058{
3059	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3060	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3061		(*(int *)data)++; /* increment found count */
3062		return cs_etm__process_aux_output_hw_id(session, event);
3063	}
3064	return 0;
3065}
3066
3067static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3068					u64 offset __maybe_unused, void *data __maybe_unused)
3069{
3070	struct perf_sample sample;
3071	int ret;
3072	struct auxtrace_index_entry *ent;
3073	struct auxtrace_index *auxtrace_index;
3074	struct evsel *evsel;
3075	size_t i;
3076
3077	/* Don't care about any other events, we're only queuing buffers for AUX events */
3078	if (event->header.type != PERF_RECORD_AUX)
3079		return 0;
3080
3081	if (event->header.size < sizeof(struct perf_record_aux))
3082		return -EINVAL;
3083
3084	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3085	if (!event->aux.aux_size)
3086		return 0;
3087
3088	/*
3089	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3090	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3091	 */
3092	evsel = evlist__event2evsel(session->evlist, event);
3093	if (!evsel)
3094		return -EINVAL;
3095	ret = evsel__parse_sample(evsel, event, &sample);
3096	if (ret)
3097		return ret;
3098
3099	/*
3100	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3101	 */
3102	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3103		for (i = 0; i < auxtrace_index->nr; i++) {
3104			ent = &auxtrace_index->entries[i];
3105			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3106							 ent->sz, &event->aux, &sample);
3107			/*
3108			 * Stop search on error or successful values. Continue search on
3109			 * 1 ('not found')
3110			 */
3111			if (ret != 1)
3112				return ret;
3113		}
3114	}
3115
3116	/*
3117	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3118	 * don't exit with an error because it will still be possible to decode other aux records.
3119	 */
3120	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3121	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3122	return 0;
3123}
3124
3125static int cs_etm__queue_aux_records(struct perf_session *session)
3126{
3127	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3128								struct auxtrace_index, list);
3129	if (index && index->nr > 0)
3130		return perf_session__peek_events(session, session->header.data_offset,
3131						 session->header.data_size,
3132						 cs_etm__queue_aux_records_cb, NULL);
3133
3134	/*
3135	 * We would get here if there are no entries in the index (either no auxtrace
3136	 * buffers or no index at all). Fail silently as there is the possibility of
3137	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3138	 * false.
3139	 *
3140	 * In that scenario, buffers will not be split by AUX records.
3141	 */
3142	return 0;
3143}
3144
3145#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3146				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3147
3148/*
3149 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3150 * timestamps).
3151 */
3152static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3153{
3154	int j;
3155
3156	for (j = 0; j < num_cpu; j++) {
3157		switch (metadata[j][CS_ETM_MAGIC]) {
3158		case __perf_cs_etmv4_magic:
3159			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3160				return false;
3161			break;
3162		case __perf_cs_ete_magic:
3163			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3164				return false;
3165			break;
3166		default:
3167			/* Unknown / unsupported magic number. */
3168			return false;
3169		}
3170	}
3171	return true;
3172}
3173
3174/* map trace ids to correct metadata block, from information in metadata */
3175static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3176{
3177	u64 cs_etm_magic;
3178	u8 trace_chan_id;
3179	int i, err;
3180
3181	for (i = 0; i < num_cpu; i++) {
3182		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3183		switch (cs_etm_magic) {
3184		case __perf_cs_etmv3_magic:
3185			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3186			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3187			break;
3188		case __perf_cs_etmv4_magic:
3189		case __perf_cs_ete_magic:
3190			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3191			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3192			break;
3193		default:
3194			/* unknown magic number */
3195			return -EINVAL;
3196		}
3197		err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3198		if (err)
3199			return err;
3200	}
3201	return 0;
3202}
3203
3204/*
3205 * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3206 * unused value to reduce the number of unneeded decoders created.
3207 */
3208static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3209{
3210	u64 cs_etm_magic;
3211	int i;
3212
3213	for (i = 0; i < num_cpu; i++) {
3214		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3215		switch (cs_etm_magic) {
3216		case __perf_cs_etmv3_magic:
3217			if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3218				metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3219			break;
3220		case __perf_cs_etmv4_magic:
3221		case __perf_cs_ete_magic:
3222			if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3223				metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3224			break;
3225		default:
3226			/* unknown magic number */
3227			return -EINVAL;
3228		}
3229	}
3230	return 0;
3231}
3232
3233int cs_etm__process_auxtrace_info_full(union perf_event *event,
3234				       struct perf_session *session)
3235{
3236	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3237	struct cs_etm_auxtrace *etm = NULL;
3238	struct perf_record_time_conv *tc = &session->time_conv;
3239	int event_header_size = sizeof(struct perf_event_header);
3240	int total_size = auxtrace_info->header.size;
3241	int priv_size = 0;
3242	int num_cpu;
3243	int err = 0;
3244	int aux_hw_id_found;
3245	int i, j;
3246	u64 *ptr = NULL;
3247	u64 **metadata = NULL;
3248
3249	/*
3250	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
3251	 * has to be made for each packet that gets decoded, optimizing access
3252	 * in anything other than a sequential array is worth doing.
3253	 */
3254	traceid_list = intlist__new(NULL);
3255	if (!traceid_list)
3256		return -ENOMEM;
3257
3258	/* First the global part */
3259	ptr = (u64 *) auxtrace_info->priv;
3260	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3261	metadata = zalloc(sizeof(*metadata) * num_cpu);
3262	if (!metadata) {
3263		err = -ENOMEM;
3264		goto err_free_traceid_list;
3265	}
3266
3267	/* Start parsing after the common part of the header */
3268	i = CS_HEADER_VERSION_MAX;
3269
3270	/*
3271	 * The metadata is stored in the auxtrace_info section and encodes
3272	 * the configuration of the ARM embedded trace macrocell which is
3273	 * required by the trace decoder to properly decode the trace due
3274	 * to its highly compressed nature.
3275	 */
3276	for (j = 0; j < num_cpu; j++) {
3277		if (ptr[i] == __perf_cs_etmv3_magic) {
3278			metadata[j] =
3279				cs_etm__create_meta_blk(ptr, &i,
3280							CS_ETM_PRIV_MAX,
3281							CS_ETM_NR_TRC_PARAMS_V0);
3282		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3283			metadata[j] =
3284				cs_etm__create_meta_blk(ptr, &i,
3285							CS_ETMV4_PRIV_MAX,
3286							CS_ETMV4_NR_TRC_PARAMS_V0);
3287		} else if (ptr[i] == __perf_cs_ete_magic) {
3288			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3289		} else {
3290			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3291				  ptr[i]);
3292			err = -EINVAL;
3293			goto err_free_metadata;
3294		}
3295
3296		if (!metadata[j]) {
3297			err = -ENOMEM;
3298			goto err_free_metadata;
3299		}
3300	}
3301
3302	/*
3303	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3304	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3305	 * global metadata, and each cpu's metadata respectively.
3306	 * The following tests if the correct number of double words was
3307	 * present in the auxtrace info section.
3308	 */
3309	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3310	if (i * 8 != priv_size) {
3311		err = -EINVAL;
3312		goto err_free_metadata;
3313	}
3314
3315	etm = zalloc(sizeof(*etm));
3316
3317	if (!etm) {
3318		err = -ENOMEM;
3319		goto err_free_metadata;
3320	}
3321
3322	/*
3323	 * As all the ETMs run at the same exception level, the system should
3324	 * have the same PID format crossing CPUs.  So cache the PID format
3325	 * and reuse it for sequential decoding.
3326	 */
3327	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3328
3329	err = auxtrace_queues__init(&etm->queues);
3330	if (err)
3331		goto err_free_etm;
3332
3333	if (session->itrace_synth_opts->set) {
3334		etm->synth_opts = *session->itrace_synth_opts;
3335	} else {
3336		itrace_synth_opts__set_default(&etm->synth_opts,
3337				session->itrace_synth_opts->default_no_sample);
3338		etm->synth_opts.callchain = false;
3339	}
3340
3341	etm->session = session;
3342
3343	etm->num_cpu = num_cpu;
3344	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3345	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3346	etm->metadata = metadata;
3347	etm->auxtrace_type = auxtrace_info->type;
3348
3349	if (etm->synth_opts.use_timestamp)
3350		/*
3351		 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3352		 * therefore the decoder cannot know if the timestamp trace is
3353		 * same with the kernel time.
3354		 *
3355		 * If a user has knowledge for the working platform and can
3356		 * specify itrace option 'T' to tell decoder to forcely use the
3357		 * traced timestamp as the kernel time.
3358		 */
3359		etm->has_virtual_ts = true;
3360	else
3361		/* Use virtual timestamps if all ETMs report ts_source = 1 */
3362		etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3363
3364	if (!etm->has_virtual_ts)
3365		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3366			    "The time field of the samples will not be set accurately.\n"
3367			    "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3368			    "you can specify the itrace option 'T' for timestamp decoding\n"
3369			    "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3370
3371	etm->auxtrace.process_event = cs_etm__process_event;
3372	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3373	etm->auxtrace.flush_events = cs_etm__flush_events;
3374	etm->auxtrace.free_events = cs_etm__free_events;
3375	etm->auxtrace.free = cs_etm__free;
3376	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3377	session->auxtrace = &etm->auxtrace;
3378
3379	err = cs_etm__setup_timeless_decoding(etm);
3380	if (err)
3381		return err;
3382
3383	etm->tc.time_shift = tc->time_shift;
3384	etm->tc.time_mult = tc->time_mult;
3385	etm->tc.time_zero = tc->time_zero;
3386	if (event_contains(*tc, time_cycles)) {
3387		etm->tc.time_cycles = tc->time_cycles;
3388		etm->tc.time_mask = tc->time_mask;
3389		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3390		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3391	}
3392	err = cs_etm__synth_events(etm, session);
3393	if (err)
3394		goto err_free_queues;
3395
3396	/*
3397	 * Map Trace ID values to CPU metadata.
3398	 *
3399	 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3400	 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3401	 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3402	 *
3403	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3404	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3405	 * in which case a different value will be used. This means an older perf may still
3406	 * be able to record and read files generate on a newer system.
3407	 *
3408	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3409	 * those packets. If they are there then the values will be mapped and plugged into
3410	 * the metadata. We then set any remaining metadata values with the used flag to a
3411	 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3412	 *
3413	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3414	 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3415	 * flags if present.
3416	 */
3417
3418	/* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3419	aux_hw_id_found = 0;
3420	err = perf_session__peek_events(session, session->header.data_offset,
3421					session->header.data_size,
3422					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3423	if (err)
3424		goto err_free_queues;
3425
3426	/* if HW ID found then clear any unused metadata ID values */
3427	if (aux_hw_id_found)
3428		err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3429	/* otherwise, this is a file with metadata values only, map from metadata */
3430	else
3431		err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3432
3433	if (err)
3434		goto err_free_queues;
3435
3436	err = cs_etm__queue_aux_records(session);
3437	if (err)
3438		goto err_free_queues;
3439
3440	etm->data_queued = etm->queues.populated;
3441	return 0;
3442
3443err_free_queues:
3444	auxtrace_queues__free(&etm->queues);
3445	session->auxtrace = NULL;
3446err_free_etm:
3447	zfree(&etm);
3448err_free_metadata:
3449	/* No need to check @metadata[j], free(NULL) is supported */
3450	for (j = 0; j < num_cpu; j++)
3451		zfree(&metadata[j]);
3452	zfree(&metadata);
3453err_free_traceid_list:
3454	intlist__delete(traceid_list);
3455	return err;
3456}
3457