1//===-- dfsan_interface.h -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of DataFlowSanitizer.
10//
11// Public interface header.
12//===----------------------------------------------------------------------===//
13#ifndef DFSAN_INTERFACE_H
14#define DFSAN_INTERFACE_H
15
16#include <stddef.h>
17#include <stdint.h>
18#include <sanitizer/common_interface_defs.h>
19
20#ifdef __cplusplus
21extern "C" {
22#endif
23
24typedef uint8_t dfsan_label;
25typedef uint32_t dfsan_origin;
26
27/// Signature of the callback argument to dfsan_set_write_callback().
28typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count);
29
30/// Signature of the callback argument to dfsan_set_conditional_callback().
31typedef void (*dfsan_conditional_callback_t)(dfsan_label label,
32                                             dfsan_origin origin);
33
34/// Signature of the callback argument to dfsan_set_reaches_function_callback().
35/// The description is intended to hold the name of the variable.
36typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label,
37                                                  dfsan_origin origin,
38                                                  const char *file,
39                                                  unsigned int line,
40                                                  const char *function);
41
42/// Computes the union of \c l1 and \c l2, resulting in a union label.
43dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
44
45/// Sets the label for each address in [addr,addr+size) to \c label.
46void dfsan_set_label(dfsan_label label, void *addr, size_t size);
47
48/// Sets the label for each address in [addr,addr+size) to the union of the
49/// current label for that address and \c label.
50void dfsan_add_label(dfsan_label label, void *addr, size_t size);
51
52/// Retrieves the label associated with the given data.
53///
54/// The type of 'data' is arbitrary.  The function accepts a value of any type,
55/// which can be truncated or extended (implicitly or explicitly) as necessary.
56/// The truncation/extension operations will preserve the label of the original
57/// value.
58dfsan_label dfsan_get_label(long data);
59
60/// Retrieves the immediate origin associated with the given data. The returned
61/// origin may point to another origin.
62///
63/// The type of 'data' is arbitrary.
64dfsan_origin dfsan_get_origin(long data);
65
66/// Retrieves the label associated with the data at the given address.
67dfsan_label dfsan_read_label(const void *addr, size_t size);
68
69/// Return the origin associated with the first taint byte in the size bytes
70/// from the address addr.
71dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size);
72
73/// Returns whether the given label label contains the label elem.
74int dfsan_has_label(dfsan_label label, dfsan_label elem);
75
76/// Flushes the DFSan shadow, i.e. forgets about all labels currently associated
77/// with the application memory.  Use this call to start over the taint tracking
78/// within the same process.
79///
80/// Note: If another thread is working with tainted data during the flush, that
81/// taint could still be written to shadow after the flush.
82void dfsan_flush(void);
83
84/// Sets a callback to be invoked on calls to write().  The callback is invoked
85/// before the write is done.  The write is not guaranteed to succeed when the
86/// callback executes.  Pass in NULL to remove any callback.
87void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback);
88
89/// Sets a callback to be invoked on any conditional expressions which have a
90/// taint label set. This can be used to find where tainted data influences
91/// the behavior of the program.
92/// These callbacks will only be added when -dfsan-conditional-callbacks=true.
93void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback);
94
95/// Conditional expressions occur during signal handlers.
96/// Making callbacks that handle signals well is tricky, so when
97/// -dfsan-conditional-callbacks=true, conditional expressions used in signal
98/// handlers will add the labels they see into a global (bitwise-or together).
99/// This function returns all label bits seen in signal handler conditions.
100dfsan_label dfsan_get_labels_in_signal_conditional();
101
102/// Sets a callback to be invoked when tainted data reaches a function.
103/// This could occur at function entry, or at a load instruction.
104/// These callbacks will only be added if -dfsan-reaches-function-callbacks=1.
105void dfsan_set_reaches_function_callback(
106    dfsan_reaches_function_callback_t callback);
107
108/// Making callbacks that handle signals well is tricky, so when
109/// -dfsan-reaches-function-callbacks=true, functions reached in signal
110/// handlers will add the labels they see into a global (bitwise-or together).
111/// This function returns all label bits seen during signal handlers.
112dfsan_label dfsan_get_labels_in_signal_reaches_function();
113
114/// Interceptor hooks.
115/// Whenever a dfsan's custom function is called the corresponding
116/// hook is called it non-zero. The hooks should be defined by the user.
117/// The primary use case is taint-guided fuzzing, where the fuzzer
118/// needs to see the parameters of the function and the labels.
119/// FIXME: implement more hooks.
120void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
121                            size_t n, dfsan_label s1_label,
122                            dfsan_label s2_label, dfsan_label n_label);
123void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
124                             size_t n, dfsan_label s1_label,
125                             dfsan_label s2_label, dfsan_label n_label);
126
127/// Prints the origin trace of the label at the address addr to stderr. It also
128/// prints description at the beginning of the trace. If origin tracking is not
129/// on, or the address is not labeled, it prints nothing.
130void dfsan_print_origin_trace(const void *addr, const char *description);
131/// As above, but use an origin id from dfsan_get_origin() instead of address.
132/// Does not include header line with taint label and address information.
133void dfsan_print_origin_id_trace(dfsan_origin origin);
134
135/// Prints the origin trace of the label at the address \p addr to a
136/// pre-allocated output buffer. If origin tracking is not on, or the address is
137/// not labeled, it prints nothing.
138///
139/// Typical usage:
140/// \code
141///   char kDescription[] = "...";
142///   char buf[1024];
143///   dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf));
144/// \endcode
145///
146/// Typical usage that handles truncation:
147/// \code
148///   char buf[1024];
149///   int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf));
150///
151///   if (len < sizeof(buf)) {
152///     ProcessOriginTrace(buf);
153///   } else {
154///     char *tmpbuf = new char[len + 1];
155///     dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1);
156///     ProcessOriginTrace(tmpbuf);
157///     delete[] tmpbuf;
158///   }
159/// \endcode
160///
161/// \param addr The tainted memory address whose origin we are printing.
162/// \param description A description printed at the beginning of the trace.
163/// \param [out] out_buf The output buffer to write the results to.
164/// \param out_buf_size The size of \p out_buf.
165///
166/// \returns The number of symbols that should have been written to \p out_buf
167/// (not including trailing null byte '\0'). Thus, the string is truncated iff
168/// return value is not less than \p out_buf_size.
169size_t dfsan_sprint_origin_trace(const void *addr, const char *description,
170                                 char *out_buf, size_t out_buf_size);
171/// As above, but use an origin id from dfsan_get_origin() instead of address.
172/// Does not include header line with taint label and address information.
173size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf,
174                                    size_t out_buf_size);
175
176/// Prints the stack trace leading to this call to a pre-allocated output
177/// buffer.
178///
179/// For usage examples, see dfsan_sprint_origin_trace.
180///
181/// \param [out] out_buf The output buffer to write the results to.
182/// \param out_buf_size The size of \p out_buf.
183///
184/// \returns The number of symbols that should have been written to \p out_buf
185/// (not including trailing null byte '\0'). Thus, the string is truncated iff
186/// return value is not less than \p out_buf_size.
187size_t dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size);
188
189/// Retrieves the very first origin associated with the data at the given
190/// address.
191dfsan_origin dfsan_get_init_origin(const void *addr);
192
193/// Returns the value of -dfsan-track-origins.
194/// * 0: do not track origins.
195/// * 1: track origins at memory store operations.
196/// * 2: track origins at memory load and store operations.
197int dfsan_get_track_origins(void);
198#ifdef __cplusplus
199}  // extern "C"
200
201template <typename T> void dfsan_set_label(dfsan_label label, T &data) {
202  dfsan_set_label(label, (void *)&data, sizeof(T));
203}
204
205#endif
206
207#endif  // DFSAN_INTERFACE_H
208