1// SPDX-License-Identifier: GPL-2.0
2
3#include <subcmd/parse-options.h>
4#include <linux/hw_breakpoint.h>
5#include <linux/perf_event.h>
6#include <linux/time64.h>
7#include <sys/syscall.h>
8#include <sys/ioctl.h>
9#include <sys/time.h>
10#include <pthread.h>
11#include <stddef.h>
12#include <stdlib.h>
13#include <unistd.h>
14#include <stdio.h>
15#include <errno.h>
16#include "bench.h"
17#include "futex.h"
18
19struct {
20	unsigned int nbreakpoints;
21	unsigned int nparallel;
22	unsigned int nthreads;
23} thread_params = {
24	.nbreakpoints = 1,
25	.nparallel = 1,
26	.nthreads = 1,
27};
28
29static const struct option thread_options[] = {
30	OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints,
31		"Specify amount of breakpoints"),
32	OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"),
33	OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"),
34	OPT_END()
35};
36
37static const char * const thread_usage[] = {
38	"perf bench breakpoint thread <options>",
39	NULL
40};
41
42struct breakpoint {
43	int fd;
44	char watched;
45};
46
47static int breakpoint_setup(void *addr)
48{
49	struct perf_event_attr attr = { .size = 0, };
50	int fd;
51
52	attr.type = PERF_TYPE_BREAKPOINT;
53	attr.size = sizeof(attr);
54	attr.inherit = 1;
55	attr.exclude_kernel = 1;
56	attr.exclude_hv = 1;
57	attr.bp_addr = (unsigned long)addr;
58	attr.bp_type = HW_BREAKPOINT_RW;
59	attr.bp_len = HW_BREAKPOINT_LEN_1;
60	fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
61
62	if (fd < 0)
63		fd = -errno;
64
65	return fd;
66}
67
68static void *passive_thread(void *arg)
69{
70	unsigned int *done = (unsigned int *)arg;
71
72	while (!__atomic_load_n(done, __ATOMIC_RELAXED))
73		futex_wait(done, 0, NULL, 0);
74	return NULL;
75}
76
77static void *active_thread(void *arg)
78{
79	unsigned int *done = (unsigned int *)arg;
80
81	while (!__atomic_load_n(done, __ATOMIC_RELAXED));
82	return NULL;
83}
84
85static void *breakpoint_thread(void *arg)
86{
87	unsigned int i, done;
88	int *repeat = (int *)arg;
89	pthread_t *threads;
90
91	threads = calloc(thread_params.nthreads, sizeof(threads[0]));
92	if (!threads)
93		exit((perror("calloc"), EXIT_FAILURE));
94
95	while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) {
96		done = 0;
97		for (i = 0; i < thread_params.nthreads; i++) {
98			if (pthread_create(&threads[i], NULL, passive_thread, &done))
99				exit((perror("pthread_create"), EXIT_FAILURE));
100		}
101		__atomic_store_n(&done, 1, __ATOMIC_RELAXED);
102		futex_wake(&done, thread_params.nthreads, 0);
103		for (i = 0; i < thread_params.nthreads; i++)
104			pthread_join(threads[i], NULL);
105	}
106	free(threads);
107	return NULL;
108}
109
110// The benchmark creates nbreakpoints inheritable breakpoints,
111// then starts nparallel threads which create and join bench_repeat batches of nthreads threads.
112int bench_breakpoint_thread(int argc, const char **argv)
113{
114	unsigned int i, result_usec;
115	int repeat = bench_repeat;
116	struct breakpoint *breakpoints;
117	pthread_t *parallel;
118	struct timeval start, stop, diff;
119
120	if (parse_options(argc, argv, thread_options, thread_usage, 0)) {
121		usage_with_options(thread_usage, thread_options);
122		exit(EXIT_FAILURE);
123	}
124	breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0]));
125	parallel = calloc(thread_params.nparallel, sizeof(parallel[0]));
126	if (!breakpoints || !parallel)
127		exit((perror("calloc"), EXIT_FAILURE));
128
129	for (i = 0; i < thread_params.nbreakpoints; i++) {
130		breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched);
131
132		if (breakpoints[i].fd < 0) {
133			if (breakpoints[i].fd == -ENODEV) {
134				printf("Skipping perf bench breakpoint thread: No hardware support\n");
135				return 0;
136			}
137			exit((perror("perf_event_open"), EXIT_FAILURE));
138		}
139	}
140	gettimeofday(&start, NULL);
141	for (i = 0; i < thread_params.nparallel; i++) {
142		if (pthread_create(&parallel[i], NULL, breakpoint_thread, &repeat))
143			exit((perror("pthread_create"), EXIT_FAILURE));
144	}
145	for (i = 0; i < thread_params.nparallel; i++)
146		pthread_join(parallel[i], NULL);
147	gettimeofday(&stop, NULL);
148	timersub(&stop, &start, &diff);
149	for (i = 0; i < thread_params.nbreakpoints; i++)
150		close(breakpoints[i].fd);
151	free(parallel);
152	free(breakpoints);
153	switch (bench_format) {
154	case BENCH_FORMAT_DEFAULT:
155		printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n",
156			bench_repeat, thread_params.nbreakpoints, thread_params.nparallel);
157		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
158			(long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
159		result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
160		printf(" %14lf usecs/op\n",
161			(double)result_usec / bench_repeat / thread_params.nthreads);
162		printf(" %14lf usecs/op/cpu\n",
163			(double)result_usec / bench_repeat /
164			thread_params.nthreads * thread_params.nparallel);
165		break;
166	case BENCH_FORMAT_SIMPLE:
167		printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
168		break;
169	default:
170		fprintf(stderr, "Unknown format: %d\n", bench_format);
171		exit(EXIT_FAILURE);
172	}
173	return 0;
174}
175
176struct {
177	unsigned int npassive;
178	unsigned int nactive;
179} enable_params = {
180	.nactive = 0,
181	.npassive = 0,
182};
183
184static const struct option enable_options[] = {
185	OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"),
186	OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"),
187	OPT_END()
188};
189
190static const char * const enable_usage[] = {
191	"perf bench breakpoint enable <options>",
192	NULL
193};
194
195// The benchmark creates an inheritable breakpoint,
196// then starts npassive threads that block and nactive threads that actively spin
197// and then disables and enables the breakpoint bench_repeat times.
198int bench_breakpoint_enable(int argc, const char **argv)
199{
200	unsigned int i, nthreads, result_usec, done = 0;
201	char watched;
202	int fd;
203	pthread_t *threads;
204	struct timeval start, stop, diff;
205
206	if (parse_options(argc, argv, enable_options, enable_usage, 0)) {
207		usage_with_options(enable_usage, enable_options);
208		exit(EXIT_FAILURE);
209	}
210	fd = breakpoint_setup(&watched);
211
212	if (fd < 0) {
213		if (fd == -ENODEV) {
214			printf("Skipping perf bench breakpoint enable: No hardware support\n");
215			return 0;
216		}
217		exit((perror("perf_event_open"), EXIT_FAILURE));
218	}
219	nthreads = enable_params.npassive + enable_params.nactive;
220	threads = calloc(nthreads, sizeof(threads[0]));
221	if (!threads)
222		exit((perror("calloc"), EXIT_FAILURE));
223
224	for (i = 0; i < nthreads; i++) {
225		if (pthread_create(&threads[i], NULL,
226			i < enable_params.npassive ? passive_thread : active_thread, &done))
227			exit((perror("pthread_create"), EXIT_FAILURE));
228	}
229	usleep(10000);  // let the threads block
230	gettimeofday(&start, NULL);
231	for (i = 0; i < bench_repeat; i++) {
232		if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
233			exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE));
234		if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0))
235			exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE));
236	}
237	gettimeofday(&stop, NULL);
238	timersub(&stop, &start, &diff);
239	__atomic_store_n(&done, 1, __ATOMIC_RELAXED);
240	futex_wake(&done, enable_params.npassive, 0);
241	for (i = 0; i < nthreads; i++)
242		pthread_join(threads[i], NULL);
243	free(threads);
244	close(fd);
245	switch (bench_format) {
246	case BENCH_FORMAT_DEFAULT:
247		printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n",
248			bench_repeat, enable_params.npassive, enable_params.nactive);
249		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
250			(long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
251		result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
252		printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat);
253		break;
254	case BENCH_FORMAT_SIMPLE:
255		printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC));
256		break;
257	default:
258		fprintf(stderr, "Unknown format: %d\n", bench_format);
259		exit(EXIT_FAILURE);
260	}
261	return 0;
262}
263