1libperf-sampling(7)
2===================
3
4NAME
5----
6libperf-sampling - sampling interface
7
8
9DESCRIPTION
10-----------
11The sampling interface provides API to measure and get count for specific perf events.
12
13The following test tries to explain count on `sampling.c` example.
14
15It is by no means complete guide to sampling, but shows libperf basic API for sampling.
16
17The `sampling.c` comes with libperf package and can be compiled and run like:
18
19[source,bash]
20--
21$ gcc -o sampling sampling.c -lperf
22$ sudo ./sampling
23cpu   0, pid      0, tid      0, ip     ffffffffad06c4e6, period                    1
24cpu   0, pid   4465, tid   4469, ip     ffffffffad118748, period             18322959
25cpu   0, pid      0, tid      0, ip     ffffffffad115722, period             33544846
26cpu   0, pid   4465, tid   4470, ip         7f84fe0cdad6, period             23687474
27cpu   0, pid      0, tid      0, ip     ffffffffad9e0349, period             34255790
28cpu   0, pid   4465, tid   4469, ip     ffffffffad136581, period             38664069
29cpu   0, pid      0, tid      0, ip     ffffffffad9e55e2, period             21922384
30cpu   0, pid   4465, tid   4470, ip         7f84fe0ebebf, period             17655175
31...
32--
33
34It requires root access, because it uses hardware cycles event.
35
36The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
37nutshell it:
38
39- creates events
40- adds them to the event list
41- opens and enables events through the event list
42- sleeps for 3 seconds
43- disables events
44- reads and displays recorded samples
45- destroys the event list
46
47The first thing you need to do before using libperf is to call init function:
48
49[source,c]
50--
51 12 static int libperf_print(enum libperf_print_level level,
52 13                          const char *fmt, va_list ap)
53 14 {
54 15         return vfprintf(stderr, fmt, ap);
55 16 }
56
57 23 int main(int argc, char **argv)
58 24 {
59 ...
60 40         libperf_init(libperf_print);
61--
62
63It will setup the library and sets function for debug output from library.
64
65The `libperf_print` callback will receive any message with its debug level,
66defined as:
67
68[source,c]
69--
70enum libperf_print_level {
71        LIBPERF_ERR,
72        LIBPERF_WARN,
73        LIBPERF_INFO,
74        LIBPERF_DEBUG,
75        LIBPERF_DEBUG2,
76        LIBPERF_DEBUG3,
77};
78--
79
80Once the setup is complete we start by defining cycles event using the `struct perf_event_attr`:
81
82[source,c]
83--
84 29         struct perf_event_attr attr = {
85 30                 .type        = PERF_TYPE_HARDWARE,
86 31                 .config      = PERF_COUNT_HW_CPU_CYCLES,
87 32                 .disabled    = 1,
88 33                 .freq        = 1,
89 34                 .sample_freq = 10,
90 35                 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
91 36         };
92--
93
94Next step is to prepare CPUs map.
95
96In this case we will monitor all the available CPUs:
97
98[source,c]
99--
100 42         cpus = perf_cpu_map__new_online_cpus();
101 43         if (!cpus) {
102 44                 fprintf(stderr, "failed to create cpus\n");
103 45                 return -1;
104 46         }
105--
106
107Now we create libperf's event list, which will serve as holder for the cycles event:
108
109[source,c]
110--
111 48         evlist = perf_evlist__new();
112 49         if (!evlist) {
113 50                 fprintf(stderr, "failed to create evlist\n");
114 51                 goto out_cpus;
115 52         }
116--
117
118We create libperf's event for the cycles attribute we defined earlier and add it to the list:
119
120[source,c]
121--
122 54         evsel = perf_evsel__new(&attr);
123 55         if (!evsel) {
124 56                 fprintf(stderr, "failed to create cycles\n");
125 57                 goto out_cpus;
126 58         }
127 59
128 60         perf_evlist__add(evlist, evsel);
129--
130
131Configure event list with the cpus map and open event:
132
133[source,c]
134--
135 62         perf_evlist__set_maps(evlist, cpus, NULL);
136 63
137 64         err = perf_evlist__open(evlist);
138 65         if (err) {
139 66                 fprintf(stderr, "failed to open evlist\n");
140 67                 goto out_evlist;
141 68         }
142--
143
144Once the events list is open, we can create memory maps AKA perf ring buffers:
145
146[source,c]
147--
148 70         err = perf_evlist__mmap(evlist, 4);
149 71         if (err) {
150 72                 fprintf(stderr, "failed to mmap evlist\n");
151 73                 goto out_evlist;
152 74         }
153--
154
155The event is created as disabled (note the `disabled = 1` assignment above),
156so we need to enable the events list explicitly.
157
158From this moment the cycles event is sampling.
159
160We will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list.
161
162[source,c]
163--
164 76         perf_evlist__enable(evlist);
165 77         sleep(3);
166 78         perf_evlist__disable(evlist);
167--
168
169Following code walks through the ring buffers and reads stored events/samples:
170
171[source,c]
172--
173 80         perf_evlist__for_each_mmap(evlist, map, false) {
174 81                 if (perf_mmap__read_init(map) < 0)
175 82                         continue;
176 83
177 84                 while ((event = perf_mmap__read_event(map)) != NULL) {
178
179                            /* process event */
180
181108                         perf_mmap__consume(map);
182109                 }
183110                 perf_mmap__read_done(map);
184111         }
185
186--
187
188Each sample needs to get parsed:
189
190[source,c]
191--
192 85                         int cpu, pid, tid;
193 86                         __u64 ip, period, *array;
194 87                         union u64_swap u;
195 88
196 89                         array = event->sample.array;
197 90
198 91                         ip = *array;
199 92                         array++;
200 93
201 94                         u.val64 = *array;
202 95                         pid = u.val32[0];
203 96                         tid = u.val32[1];
204 97                         array++;
205 98
206 99                         u.val64 = *array;
207100                         cpu = u.val32[0];
208101                         array++;
209102
210103                         period = *array;
211104
212105                         fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
213106                                 cpu, pid, tid, ip, period);
214--
215
216And finally cleanup.
217
218We close the whole events list (both events) and remove it together with the threads map:
219
220[source,c]
221--
222113 out_evlist:
223114         perf_evlist__delete(evlist);
224115 out_cpus:
225116         perf_cpu_map__put(cpus);
226117         return err;
227118 }
228--
229
230REPORTING BUGS
231--------------
232Report bugs to <linux-perf-users@vger.kernel.org>.
233
234LICENSE
235-------
236libperf is Free Software licensed under the GNU LGPL 2.1
237
238RESOURCES
239---------
240https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
241
242SEE ALSO
243--------
244libperf(3), libperf-counting(7)
245