1/* An experimental state machine, for tracking "taint": unsanitized uses 2 of data potentially under an attacker's control. 3 4 Copyright (C) 2019-2020 Free Software Foundation, Inc. 5 Contributed by David Malcolm <dmalcolm@redhat.com>. 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it 10under the terms of the GNU General Public License as published by 11the Free Software Foundation; either version 3, or (at your option) 12any later version. 13 14GCC is distributed in the hope that it will be useful, but 15WITHOUT ANY WARRANTY; without even the implied warranty of 16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17General Public License for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "tree.h" 27#include "function.h" 28#include "basic-block.h" 29#include "gimple.h" 30#include "options.h" 31#include "diagnostic-path.h" 32#include "diagnostic-metadata.h" 33#include "function.h" 34#include "analyzer/analyzer.h" 35#include "diagnostic-event-id.h" 36#include "analyzer/analyzer-logging.h" 37#include "analyzer/sm.h" 38#include "analyzer/pending-diagnostic.h" 39 40#if ENABLE_ANALYZER 41 42namespace ana { 43 44namespace { 45 46/* An experimental state machine, for tracking "taint": unsanitized uses 47 of data potentially under an attacker's control. */ 48 49class taint_state_machine : public state_machine 50{ 51public: 52 taint_state_machine (logger *logger); 53 54 bool inherited_state_p () const FINAL OVERRIDE { return true; } 55 56 bool on_stmt (sm_context *sm_ctxt, 57 const supernode *node, 58 const gimple *stmt) const FINAL OVERRIDE; 59 60 void on_condition (sm_context *sm_ctxt, 61 const supernode *node, 62 const gimple *stmt, 63 tree lhs, 64 enum tree_code op, 65 tree rhs) const FINAL OVERRIDE; 66 67 bool can_purge_p (state_t s) const FINAL OVERRIDE; 68 69 /* Start state. */ 70 state_t m_start; 71 72 /* State for a "tainted" value: unsanitized data potentially under an 73 attacker's control. */ 74 state_t m_tainted; 75 76 /* State for a "tainted" value that has a lower bound. */ 77 state_t m_has_lb; 78 79 /* State for a "tainted" value that has an upper bound. */ 80 state_t m_has_ub; 81 82 /* Stop state, for a value we don't want to track any more. */ 83 state_t m_stop; 84}; 85 86enum bounds 87{ 88 BOUNDS_NONE, 89 BOUNDS_UPPER, 90 BOUNDS_LOWER 91}; 92 93class tainted_array_index 94 : public pending_diagnostic_subclass<tainted_array_index> 95{ 96public: 97 tainted_array_index (const taint_state_machine &sm, tree arg, 98 enum bounds has_bounds) 99 : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) {} 100 101 const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; } 102 103 bool operator== (const tainted_array_index &other) const 104 { 105 return same_tree_p (m_arg, other.m_arg); 106 } 107 108 bool emit (rich_location *rich_loc) FINAL OVERRIDE 109 { 110 diagnostic_metadata m; 111 m.add_cwe (129); 112 switch (m_has_bounds) 113 { 114 default: 115 gcc_unreachable (); 116 case BOUNDS_NONE: 117 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index, 118 "use of tainted value %qE in array lookup" 119 " without bounds checking", 120 m_arg); 121 break; 122 case BOUNDS_UPPER: 123 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index, 124 "use of tainted value %qE in array lookup" 125 " without lower-bounds checking", 126 m_arg); 127 break; 128 case BOUNDS_LOWER: 129 return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index, 130 "use of tainted value %qE in array lookup" 131 " without upper-bounds checking", 132 m_arg); 133 break; 134 } 135 } 136 137 label_text describe_state_change (const evdesc::state_change &change) 138 FINAL OVERRIDE 139 { 140 if (change.m_new_state == m_sm.m_tainted) 141 { 142 if (change.m_origin) 143 return change.formatted_print ("%qE has an unchecked value here" 144 " (from %qE)", 145 change.m_expr, change.m_origin); 146 else 147 return change.formatted_print ("%qE gets an unchecked value here", 148 change.m_expr); 149 } 150 else if (change.m_new_state == m_sm.m_has_lb) 151 return change.formatted_print ("%qE has its lower bound checked here", 152 change.m_expr); 153 else if (change.m_new_state == m_sm.m_has_ub) 154 return change.formatted_print ("%qE has its upper bound checked here", 155 change.m_expr); 156 return label_text (); 157 } 158 159 label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE 160 { 161 switch (m_has_bounds) 162 { 163 default: 164 gcc_unreachable (); 165 case BOUNDS_NONE: 166 return ev.formatted_print ("use of tainted value %qE in array lookup" 167 " without bounds checking", 168 m_arg); 169 case BOUNDS_UPPER: 170 return ev.formatted_print ("use of tainted value %qE in array lookup" 171 " without lower-bounds checking", 172 m_arg); 173 case BOUNDS_LOWER: 174 return ev.formatted_print ("use of tainted value %qE in array lookup" 175 " without upper-bounds checking", 176 m_arg); 177 } 178 } 179 180private: 181 const taint_state_machine &m_sm; 182 tree m_arg; 183 enum bounds m_has_bounds; 184}; 185 186/* taint_state_machine's ctor. */ 187 188taint_state_machine::taint_state_machine (logger *logger) 189: state_machine ("taint", logger) 190{ 191 m_start = add_state ("start"); 192 m_tainted = add_state ("tainted"); 193 m_has_lb = add_state ("has_lb"); 194 m_has_ub = add_state ("has_ub"); 195 m_stop = add_state ("stop"); 196} 197 198/* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */ 199 200bool 201taint_state_machine::on_stmt (sm_context *sm_ctxt, 202 const supernode *node, 203 const gimple *stmt) const 204{ 205 if (const gcall *call = dyn_cast <const gcall *> (stmt)) 206 if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call)) 207 { 208 if (is_named_call_p (callee_fndecl, "fread", call, 4)) 209 { 210 tree arg = gimple_call_arg (call, 0); 211 arg = sm_ctxt->get_readable_tree (arg); 212 213 sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted); 214 215 /* Dereference an ADDR_EXPR. */ 216 // TODO: should the engine do this? 217 if (TREE_CODE (arg) == ADDR_EXPR) 218 sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0), 219 m_start, m_tainted); 220 return true; 221 } 222 } 223 // TODO: ...etc; many other sources of untrusted data 224 225 if (const gassign *assign = dyn_cast <const gassign *> (stmt)) 226 { 227 tree rhs1 = gimple_assign_rhs1 (assign); 228 enum tree_code op = gimple_assign_rhs_code (assign); 229 230 /* Check array accesses. */ 231 if (op == ARRAY_REF) 232 { 233 tree arg = TREE_OPERAND (rhs1, 1); 234 arg = sm_ctxt->get_readable_tree (arg); 235 236 /* Unsigned types have an implicit lower bound. */ 237 bool is_unsigned = false; 238 if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) 239 is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg)); 240 241 /* Complain about missing bounds. */ 242 sm_ctxt->warn_for_state 243 (node, stmt, arg, m_tainted, 244 new tainted_array_index (*this, arg, 245 is_unsigned 246 ? BOUNDS_LOWER : BOUNDS_NONE)); 247 sm_ctxt->on_transition (node, stmt, arg, m_tainted, m_stop); 248 249 /* Complain about missing upper bound. */ 250 sm_ctxt->warn_for_state (node, stmt, arg, m_has_lb, 251 new tainted_array_index (*this, arg, 252 BOUNDS_LOWER)); 253 sm_ctxt->on_transition (node, stmt, arg, m_has_lb, m_stop); 254 255 /* Complain about missing lower bound. */ 256 if (!is_unsigned) 257 { 258 sm_ctxt->warn_for_state (node, stmt, arg, m_has_ub, 259 new tainted_array_index (*this, arg, 260 BOUNDS_UPPER)); 261 sm_ctxt->on_transition (node, stmt, arg, m_has_ub, m_stop); 262 } 263 } 264 } 265 266 return false; 267} 268 269/* Implementation of state_machine::on_condition vfunc for taint_state_machine. 270 Potentially transition state 'tainted' to 'has_ub' or 'has_lb', 271 and states 'has_ub' and 'has_lb' to 'stop'. */ 272 273void 274taint_state_machine::on_condition (sm_context *sm_ctxt, 275 const supernode *node, 276 const gimple *stmt, 277 tree lhs, 278 enum tree_code op, 279 tree rhs ATTRIBUTE_UNUSED) const 280{ 281 if (stmt == NULL) 282 return; 283 284 // TODO: this doesn't use the RHS; should we make it symmetric? 285 286 // TODO 287 switch (op) 288 { 289 //case NE_EXPR: 290 //case EQ_EXPR: 291 case GE_EXPR: 292 case GT_EXPR: 293 { 294 sm_ctxt->on_transition (node, stmt, lhs, m_tainted, 295 m_has_lb); 296 sm_ctxt->on_transition (node, stmt, lhs, m_has_ub, 297 m_stop); 298 } 299 break; 300 case LE_EXPR: 301 case LT_EXPR: 302 { 303 sm_ctxt->on_transition (node, stmt, lhs, m_tainted, 304 m_has_ub); 305 sm_ctxt->on_transition (node, stmt, lhs, m_has_lb, 306 m_stop); 307 } 308 break; 309 default: 310 break; 311 } 312} 313 314bool 315taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const 316{ 317 return true; 318} 319 320} // anonymous namespace 321 322/* Internal interface to this file. */ 323 324state_machine * 325make_taint_state_machine (logger *logger) 326{ 327 return new taint_state_machine (logger); 328} 329 330} // namespace ana 331 332#endif /* #if ENABLE_ANALYZER */ 333