1// SPDX-License-Identifier: GPL-2.0
2/*
3 * fill_buf benchmark
4 *
5 * Copyright (C) 2018 Intel Corporation
6 *
7 * Authors:
8 *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9 *    Fenghua Yu <fenghua.yu@intel.com>
10 */
11#include <stdio.h>
12#include <unistd.h>
13#include <stdlib.h>
14#include <sys/types.h>
15#include <sys/wait.h>
16#include <inttypes.h>
17#include <string.h>
18
19#include "resctrl.h"
20
21#define CL_SIZE			(64)
22#define PAGE_SIZE		(4 * 1024)
23#define MB			(1024 * 1024)
24
25static void sb(void)
26{
27#if defined(__i386) || defined(__x86_64)
28	asm volatile("sfence\n\t"
29		     : : : "memory");
30#endif
31}
32
33static void cl_flush(void *p)
34{
35#if defined(__i386) || defined(__x86_64)
36	asm volatile("clflush (%0)\n\t"
37		     : : "r"(p) : "memory");
38#endif
39}
40
41void mem_flush(unsigned char *buf, size_t buf_size)
42{
43	unsigned char *cp = buf;
44	size_t i = 0;
45
46	buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
47
48	for (i = 0; i < buf_size; i++)
49		cl_flush(&cp[i * CL_SIZE]);
50
51	sb();
52}
53
54/*
55 * Buffer index step advance to workaround HW prefetching interfering with
56 * the measurements.
57 *
58 * Must be a prime to step through all indexes of the buffer.
59 *
60 * Some primes work better than others on some architectures (from MBA/MBM
61 * result stability point of view).
62 */
63#define FILL_IDX_MULT	23
64
65static int fill_one_span_read(unsigned char *buf, size_t buf_size)
66{
67	unsigned int size = buf_size / (CL_SIZE / 2);
68	unsigned int i, idx = 0;
69	unsigned char sum = 0;
70
71	/*
72	 * Read the buffer in an order that is unexpected by HW prefetching
73	 * optimizations to prevent them interfering with the caching pattern.
74	 *
75	 * The read order is (in terms of halves of cachelines):
76	 *	i * FILL_IDX_MULT % size
77	 * The formula is open-coded below to avoiding modulo inside the loop
78	 * as it improves MBA/MBM result stability on some architectures.
79	 */
80	for (i = 0; i < size; i++) {
81		sum += buf[idx * (CL_SIZE / 2)];
82
83		idx += FILL_IDX_MULT;
84		while (idx >= size)
85			idx -= size;
86	}
87
88	return sum;
89}
90
91static void fill_one_span_write(unsigned char *buf, size_t buf_size)
92{
93	unsigned char *end_ptr = buf + buf_size;
94	unsigned char *p;
95
96	p = buf;
97	while (p < end_ptr) {
98		*p = '1';
99		p += (CL_SIZE / 2);
100	}
101}
102
103void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
104{
105	int ret = 0;
106
107	while (1) {
108		ret = fill_one_span_read(buf, buf_size);
109		if (once)
110			break;
111	}
112
113	/* Consume read result so that reading memory is not optimized out. */
114	*value_sink = ret;
115}
116
117static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
118{
119	while (1) {
120		fill_one_span_write(buf, buf_size);
121		if (once)
122			break;
123	}
124}
125
126unsigned char *alloc_buffer(size_t buf_size, int memflush)
127{
128	void *buf = NULL;
129	uint64_t *p64;
130	size_t s64;
131	int ret;
132
133	ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
134	if (ret < 0)
135		return NULL;
136
137	/* Initialize the buffer */
138	p64 = buf;
139	s64 = buf_size / sizeof(uint64_t);
140
141	while (s64 > 0) {
142		*p64 = (uint64_t)rand();
143		p64 += (CL_SIZE / sizeof(uint64_t));
144		s64 -= (CL_SIZE / sizeof(uint64_t));
145	}
146
147	/* Flush the memory before using to avoid "cache hot pages" effect */
148	if (memflush)
149		mem_flush(buf, buf_size);
150
151	return buf;
152}
153
154int run_fill_buf(size_t buf_size, int memflush, int op, bool once)
155{
156	unsigned char *buf;
157
158	buf = alloc_buffer(buf_size, memflush);
159	if (!buf)
160		return -1;
161
162	if (op == 0)
163		fill_cache_read(buf, buf_size, once);
164	else
165		fill_cache_write(buf, buf_size, once);
166	free(buf);
167
168	return 0;
169}
170