1//===-- dfsan_interface.h -------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file is a part of DataFlowSanitizer. 10// 11// Public interface header. 12//===----------------------------------------------------------------------===// 13#ifndef DFSAN_INTERFACE_H 14#define DFSAN_INTERFACE_H 15 16#include <stddef.h> 17#include <stdint.h> 18#include <sanitizer/common_interface_defs.h> 19 20#ifdef __cplusplus 21extern "C" { 22#endif 23 24typedef uint8_t dfsan_label; 25typedef uint32_t dfsan_origin; 26 27/// Signature of the callback argument to dfsan_set_write_callback(). 28typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count); 29 30/// Signature of the callback argument to dfsan_set_conditional_callback(). 31typedef void (*dfsan_conditional_callback_t)(dfsan_label label, 32 dfsan_origin origin); 33 34/// Signature of the callback argument to dfsan_set_reaches_function_callback(). 35/// The description is intended to hold the name of the variable. 36typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label, 37 dfsan_origin origin, 38 const char *file, 39 unsigned int line, 40 const char *function); 41 42/// Computes the union of \c l1 and \c l2, resulting in a union label. 43dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2); 44 45/// Sets the label for each address in [addr,addr+size) to \c label. 46void dfsan_set_label(dfsan_label label, void *addr, size_t size); 47 48/// Sets the label for each address in [addr,addr+size) to the union of the 49/// current label for that address and \c label. 50void dfsan_add_label(dfsan_label label, void *addr, size_t size); 51 52/// Retrieves the label associated with the given data. 53/// 54/// The type of 'data' is arbitrary. The function accepts a value of any type, 55/// which can be truncated or extended (implicitly or explicitly) as necessary. 56/// The truncation/extension operations will preserve the label of the original 57/// value. 58dfsan_label dfsan_get_label(long data); 59 60/// Retrieves the immediate origin associated with the given data. The returned 61/// origin may point to another origin. 62/// 63/// The type of 'data' is arbitrary. 64dfsan_origin dfsan_get_origin(long data); 65 66/// Retrieves the label associated with the data at the given address. 67dfsan_label dfsan_read_label(const void *addr, size_t size); 68 69/// Return the origin associated with the first taint byte in the size bytes 70/// from the address addr. 71dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size); 72 73/// Returns whether the given label label contains the label elem. 74int dfsan_has_label(dfsan_label label, dfsan_label elem); 75 76/// Flushes the DFSan shadow, i.e. forgets about all labels currently associated 77/// with the application memory. Use this call to start over the taint tracking 78/// within the same process. 79/// 80/// Note: If another thread is working with tainted data during the flush, that 81/// taint could still be written to shadow after the flush. 82void dfsan_flush(void); 83 84/// Sets a callback to be invoked on calls to write(). The callback is invoked 85/// before the write is done. The write is not guaranteed to succeed when the 86/// callback executes. Pass in NULL to remove any callback. 87void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback); 88 89/// Sets a callback to be invoked on any conditional expressions which have a 90/// taint label set. This can be used to find where tainted data influences 91/// the behavior of the program. 92/// These callbacks will only be added when -dfsan-conditional-callbacks=true. 93void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback); 94 95/// Conditional expressions occur during signal handlers. 96/// Making callbacks that handle signals well is tricky, so when 97/// -dfsan-conditional-callbacks=true, conditional expressions used in signal 98/// handlers will add the labels they see into a global (bitwise-or together). 99/// This function returns all label bits seen in signal handler conditions. 100dfsan_label dfsan_get_labels_in_signal_conditional(); 101 102/// Sets a callback to be invoked when tainted data reaches a function. 103/// This could occur at function entry, or at a load instruction. 104/// These callbacks will only be added if -dfsan-reaches-function-callbacks=1. 105void dfsan_set_reaches_function_callback( 106 dfsan_reaches_function_callback_t callback); 107 108/// Making callbacks that handle signals well is tricky, so when 109/// -dfsan-reaches-function-callbacks=true, functions reached in signal 110/// handlers will add the labels they see into a global (bitwise-or together). 111/// This function returns all label bits seen during signal handlers. 112dfsan_label dfsan_get_labels_in_signal_reaches_function(); 113 114/// Interceptor hooks. 115/// Whenever a dfsan's custom function is called the corresponding 116/// hook is called it non-zero. The hooks should be defined by the user. 117/// The primary use case is taint-guided fuzzing, where the fuzzer 118/// needs to see the parameters of the function and the labels. 119/// FIXME: implement more hooks. 120void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, 121 size_t n, dfsan_label s1_label, 122 dfsan_label s2_label, dfsan_label n_label); 123void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, 124 size_t n, dfsan_label s1_label, 125 dfsan_label s2_label, dfsan_label n_label); 126 127/// Prints the origin trace of the label at the address addr to stderr. It also 128/// prints description at the beginning of the trace. If origin tracking is not 129/// on, or the address is not labeled, it prints nothing. 130void dfsan_print_origin_trace(const void *addr, const char *description); 131/// As above, but use an origin id from dfsan_get_origin() instead of address. 132/// Does not include header line with taint label and address information. 133void dfsan_print_origin_id_trace(dfsan_origin origin); 134 135/// Prints the origin trace of the label at the address \p addr to a 136/// pre-allocated output buffer. If origin tracking is not on, or the address is 137/// not labeled, it prints nothing. 138/// 139/// Typical usage: 140/// \code 141/// char kDescription[] = "..."; 142/// char buf[1024]; 143/// dfsan_sprint_origin_trace(&tainted_var, kDescription, buf, sizeof(buf)); 144/// \endcode 145/// 146/// Typical usage that handles truncation: 147/// \code 148/// char buf[1024]; 149/// int len = dfsan_sprint_origin_trace(&var, nullptr, buf, sizeof(buf)); 150/// 151/// if (len < sizeof(buf)) { 152/// ProcessOriginTrace(buf); 153/// } else { 154/// char *tmpbuf = new char[len + 1]; 155/// dfsan_sprint_origin_trace(&var, nullptr, tmpbuf, len + 1); 156/// ProcessOriginTrace(tmpbuf); 157/// delete[] tmpbuf; 158/// } 159/// \endcode 160/// 161/// \param addr The tainted memory address whose origin we are printing. 162/// \param description A description printed at the beginning of the trace. 163/// \param [out] out_buf The output buffer to write the results to. 164/// \param out_buf_size The size of \p out_buf. 165/// 166/// \returns The number of symbols that should have been written to \p out_buf 167/// (not including trailing null byte '\0'). Thus, the string is truncated iff 168/// return value is not less than \p out_buf_size. 169size_t dfsan_sprint_origin_trace(const void *addr, const char *description, 170 char *out_buf, size_t out_buf_size); 171/// As above, but use an origin id from dfsan_get_origin() instead of address. 172/// Does not include header line with taint label and address information. 173size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf, 174 size_t out_buf_size); 175 176/// Prints the stack trace leading to this call to a pre-allocated output 177/// buffer. 178/// 179/// For usage examples, see dfsan_sprint_origin_trace. 180/// 181/// \param [out] out_buf The output buffer to write the results to. 182/// \param out_buf_size The size of \p out_buf. 183/// 184/// \returns The number of symbols that should have been written to \p out_buf 185/// (not including trailing null byte '\0'). Thus, the string is truncated iff 186/// return value is not less than \p out_buf_size. 187size_t dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size); 188 189/// Retrieves the very first origin associated with the data at the given 190/// address. 191dfsan_origin dfsan_get_init_origin(const void *addr); 192 193/// Returns the value of -dfsan-track-origins. 194/// * 0: do not track origins. 195/// * 1: track origins at memory store operations. 196/// * 2: track origins at memory load and store operations. 197int dfsan_get_track_origins(void); 198#ifdef __cplusplus 199} // extern "C" 200 201template <typename T> void dfsan_set_label(dfsan_label label, T &data) { 202 dfsan_set_label(label, (void *)&data, sizeof(T)); 203} 204 205#endif 206 207#endif // DFSAN_INTERFACE_H 208