1/*
2 * strlen benchmark.
3 *
4 * Copyright (c) 2020-2021, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8#define _GNU_SOURCE
9#include <stdint.h>
10#include <stdio.h>
11#include <string.h>
12#include <assert.h>
13#include "stringlib.h"
14#include "benchlib.h"
15
16#define ITERS 5000
17#define ITERS2 20000000
18#define ITERS3 2000000
19#define NUM_TESTS 16384
20
21#define MAX_ALIGN 32
22#define MAX_STRLEN 256
23
24static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
25
26#define F(x, mte) {#x, x, mte},
27
28static const struct fun
29{
30  const char *name;
31  size_t (*fun) (const char *s);
32  int test_mte;
33} funtab[] = {
34  // clang-format off
35  F(strlen, 0)
36#if __aarch64__
37  F(__strlen_aarch64, 0)
38  F(__strlen_aarch64_mte, 1)
39# if __ARM_FEATURE_SVE
40  F(__strlen_aarch64_sve, 1)
41# endif
42#elif __arm__
43# if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
44  F(__strlen_armv6t2, 0)
45# endif
46#endif
47  {0, 0, 0}
48  // clang-format on
49};
50#undef F
51
52static uint16_t strlen_tests[NUM_TESTS];
53
54typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
55typedef struct { uint8_t align; uint16_t freq; } align_data_t;
56
57#define SIZE_NUM 65536
58#define SIZE_MASK (SIZE_NUM - 1)
59static uint8_t strlen_len_arr[SIZE_NUM];
60
61/* Frequency data for strlen sizes up to 128 based on SPEC2017.  */
62static freq_data_t strlen_len_freq[] =
63{
64  { 12,22671}, { 18,12834}, { 13, 9555}, {  6, 6348}, { 17, 6095}, { 11, 2115},
65  { 10, 1335}, {  7,  814}, {  2,  646}, {  9,  483}, {  8,  471}, { 16,  418},
66  {  4,  390}, {  1,  388}, {  5,  233}, {  3,  204}, {  0,   79}, { 14,   79},
67  { 15,   69}, { 26,   36}, { 22,   35}, { 31,   24}, { 32,   24}, { 19,   21},
68  { 25,   17}, { 28,   15}, { 21,   14}, { 33,   14}, { 20,   13}, { 24,    9},
69  { 29,    9}, { 30,    9}, { 23,    7}, { 34,    7}, { 27,    6}, { 44,    5},
70  { 42,    4}, { 45,    3}, { 47,    3}, { 40,    2}, { 41,    2}, { 43,    2},
71  { 58,    2}, { 78,    2}, { 36,    2}, { 48,    1}, { 52,    1}, { 60,    1},
72  { 64,    1}, { 56,    1}, { 76,    1}, { 68,    1}, { 80,    1}, { 84,    1},
73  { 72,    1}, { 86,    1}, { 35,    1}, { 39,    1}, { 50,    1}, { 38,    1},
74  { 37,    1}, { 46,    1}, { 98,    1}, {102,    1}, {128,    1}, { 51,    1},
75  {107,    1}, { 0,     0}
76};
77
78#define ALIGN_NUM 1024
79#define ALIGN_MASK (ALIGN_NUM - 1)
80static uint8_t strlen_align_arr[ALIGN_NUM];
81
82/* Alignment data for strlen based on SPEC2017.  */
83static align_data_t string_align_freq[] =
84{
85  {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0}
86};
87
88static void
89init_strlen_distribution (void)
90{
91  int i, j, freq, size, n;
92
93  for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++)
94    for (j = 0, size = strlen_len_freq[i].size; j < freq; j++)
95      strlen_len_arr[n++] = size;
96  assert (n == SIZE_NUM);
97
98  for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++)
99    for (j = 0, size = string_align_freq[i].align; j < freq; j++)
100      strlen_align_arr[n++] = size;
101  assert (n == ALIGN_NUM);
102}
103
104static void
105init_strlen_tests (void)
106{
107  uint16_t index[MAX_ALIGN];
108
109  memset (a, 'x', sizeof (a));
110
111  /* Create indices for strings at all alignments.  */
112  for (int i = 0; i < MAX_ALIGN; i++)
113    {
114      index[i] = i * (MAX_STRLEN + 1);
115      a[index[i] + MAX_STRLEN] = 0;
116    }
117
118  /* Create a random set of strlen input strings using the string length
119     and alignment distributions.  */
120  for (int n = 0; n < NUM_TESTS; n++)
121    {
122      int align = strlen_align_arr[rand32 (0) & ALIGN_MASK];
123      int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK];
124
125      strlen_tests[n] =
126	index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
127    }
128}
129
130static volatile size_t maskv = 0;
131
132int main (void)
133{
134  rand32 (0x12345678);
135  init_strlen_distribution ();
136  init_strlen_tests ();
137
138  printf ("\nRandom strlen (bytes/ns):\n");
139  for (int f = 0; funtab[f].name != 0; f++)
140    {
141      size_t res = 0, strlen_size = 0, mask = maskv;
142      printf ("%22s ", funtab[f].name);
143
144      for (int c = 0; c < NUM_TESTS; c++)
145	strlen_size += funtab[f].fun (a + strlen_tests[c]);
146      strlen_size *= ITERS;
147
148      /* Measure latency of strlen result with (res & mask).  */
149      uint64_t t = clock_get_ns ();
150      for (int i = 0; i < ITERS; i++)
151	for (int c = 0; c < NUM_TESTS; c++)
152	  res = funtab[f].fun (a + strlen_tests[c] + (res & mask));
153      t = clock_get_ns () - t;
154      printf ("%.2f\n", (double)strlen_size / t);
155    }
156
157  printf ("\nSmall aligned strlen (bytes/ns):\n");
158  for (int f = 0; funtab[f].name != 0; f++)
159    {
160      printf ("%22s ", funtab[f].name);
161
162      for (int size = 1; size <= 64; size *= 2)
163	{
164	  memset (a, 'x', size);
165	  a[size - 1] = 0;
166
167	  uint64_t t = clock_get_ns ();
168	  for (int i = 0; i < ITERS2; i++)
169	    funtab[f].fun (a);
170	  t = clock_get_ns () - t;
171	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
172		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
173	}
174      printf ("\n");
175    }
176
177  printf ("\nSmall unaligned strlen (bytes/ns):\n");
178  for (int f = 0; funtab[f].name != 0; f++)
179    {
180      printf ("%22s ", funtab[f].name);
181
182      int align = 9;
183      for (int size = 1; size <= 64; size *= 2)
184	{
185	  memset (a + align, 'x', size);
186	  a[align + size - 1] = 0;
187
188	  uint64_t t = clock_get_ns ();
189	  for (int i = 0; i < ITERS2; i++)
190	    funtab[f].fun (a + align);
191	  t = clock_get_ns () - t;
192	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
193		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
194	}
195      printf ("\n");
196    }
197
198  printf ("\nMedium strlen (bytes/ns):\n");
199  for (int f = 0; funtab[f].name != 0; f++)
200    {
201      printf ("%22s ", funtab[f].name);
202
203      for (int size = 128; size <= 4096; size *= 2)
204	{
205	  memset (a, 'x', size);
206	  a[size - 1] = 0;
207
208	  uint64_t t = clock_get_ns ();
209	  for (int i = 0; i < ITERS3; i++)
210	    funtab[f].fun (a);
211	  t = clock_get_ns () - t;
212	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
213		  size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
214	}
215      printf ("\n");
216    }
217
218  printf ("\n");
219
220  return 0;
221}
222