1/* An experimental state machine, for tracking "taint": unsanitized uses
2   of data potentially under an attacker's control.
3
4   Copyright (C) 2019-2020 Free Software Foundation, Inc.
5   Contributed by David Malcolm <dmalcolm@redhat.com>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 3, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful, but
15WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tree.h"
27#include "function.h"
28#include "basic-block.h"
29#include "gimple.h"
30#include "options.h"
31#include "diagnostic-path.h"
32#include "diagnostic-metadata.h"
33#include "function.h"
34#include "analyzer/analyzer.h"
35#include "diagnostic-event-id.h"
36#include "analyzer/analyzer-logging.h"
37#include "analyzer/sm.h"
38#include "analyzer/pending-diagnostic.h"
39
40#if ENABLE_ANALYZER
41
42namespace ana {
43
44namespace {
45
46/* An experimental state machine, for tracking "taint": unsanitized uses
47   of data potentially under an attacker's control.  */
48
49class taint_state_machine : public state_machine
50{
51public:
52  taint_state_machine (logger *logger);
53
54  bool inherited_state_p () const FINAL OVERRIDE { return true; }
55
56  bool on_stmt (sm_context *sm_ctxt,
57		const supernode *node,
58		const gimple *stmt) const FINAL OVERRIDE;
59
60  void on_condition (sm_context *sm_ctxt,
61		     const supernode *node,
62		     const gimple *stmt,
63		     tree lhs,
64		     enum tree_code op,
65		     tree rhs) const FINAL OVERRIDE;
66
67  bool can_purge_p (state_t s) const FINAL OVERRIDE;
68
69  /* Start state.  */
70  state_t m_start;
71
72  /* State for a "tainted" value: unsanitized data potentially under an
73     attacker's control.  */
74  state_t m_tainted;
75
76  /* State for a "tainted" value that has a lower bound.  */
77  state_t m_has_lb;
78
79  /* State for a "tainted" value that has an upper bound.  */
80  state_t m_has_ub;
81
82  /* Stop state, for a value we don't want to track any more.  */
83  state_t m_stop;
84};
85
86enum bounds
87{
88  BOUNDS_NONE,
89  BOUNDS_UPPER,
90  BOUNDS_LOWER
91};
92
93class tainted_array_index
94  : public pending_diagnostic_subclass<tainted_array_index>
95{
96public:
97  tainted_array_index (const taint_state_machine &sm, tree arg,
98		       enum bounds has_bounds)
99  : m_sm (sm), m_arg (arg), m_has_bounds (has_bounds) {}
100
101  const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
102
103  bool operator== (const tainted_array_index &other) const
104  {
105    return same_tree_p (m_arg, other.m_arg);
106  }
107
108  bool emit (rich_location *rich_loc) FINAL OVERRIDE
109  {
110    diagnostic_metadata m;
111    m.add_cwe (129);
112    switch (m_has_bounds)
113      {
114      default:
115	gcc_unreachable ();
116      case BOUNDS_NONE:
117	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
118			     "use of tainted value %qE in array lookup"
119			     " without bounds checking",
120			     m_arg);
121	break;
122      case BOUNDS_UPPER:
123	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
124			     "use of tainted value %qE in array lookup"
125			     " without lower-bounds checking",
126			     m_arg);
127	break;
128      case BOUNDS_LOWER:
129	return warning_meta (rich_loc, m, OPT_Wanalyzer_tainted_array_index,
130			     "use of tainted value %qE in array lookup"
131			     " without upper-bounds checking",
132			     m_arg);
133	break;
134      }
135  }
136
137  label_text describe_state_change (const evdesc::state_change &change)
138    FINAL OVERRIDE
139  {
140    if (change.m_new_state == m_sm.m_tainted)
141      {
142	if (change.m_origin)
143	  return change.formatted_print ("%qE has an unchecked value here"
144					 " (from %qE)",
145					 change.m_expr, change.m_origin);
146	else
147	  return change.formatted_print ("%qE gets an unchecked value here",
148					 change.m_expr);
149      }
150    else if (change.m_new_state == m_sm.m_has_lb)
151      return change.formatted_print ("%qE has its lower bound checked here",
152				     change.m_expr);
153    else if (change.m_new_state == m_sm.m_has_ub)
154      return change.formatted_print ("%qE has its upper bound checked here",
155				     change.m_expr);
156    return label_text ();
157  }
158
159  label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
160  {
161    switch (m_has_bounds)
162      {
163      default:
164	gcc_unreachable ();
165      case BOUNDS_NONE:
166	return ev.formatted_print ("use of tainted value %qE in array lookup"
167				   " without bounds checking",
168				   m_arg);
169      case BOUNDS_UPPER:
170	return ev.formatted_print ("use of tainted value %qE in array lookup"
171				   " without lower-bounds checking",
172				   m_arg);
173      case BOUNDS_LOWER:
174	return ev.formatted_print ("use of tainted value %qE in array lookup"
175				   " without upper-bounds checking",
176				   m_arg);
177      }
178  }
179
180private:
181  const taint_state_machine &m_sm;
182  tree m_arg;
183  enum bounds m_has_bounds;
184};
185
186/* taint_state_machine's ctor.  */
187
188taint_state_machine::taint_state_machine (logger *logger)
189: state_machine ("taint", logger)
190{
191  m_start = add_state ("start");
192  m_tainted = add_state ("tainted");
193  m_has_lb = add_state ("has_lb");
194  m_has_ub = add_state ("has_ub");
195  m_stop = add_state ("stop");
196}
197
198/* Implementation of state_machine::on_stmt vfunc for taint_state_machine.  */
199
200bool
201taint_state_machine::on_stmt (sm_context *sm_ctxt,
202			       const supernode *node,
203			       const gimple *stmt) const
204{
205  if (const gcall *call = dyn_cast <const gcall *> (stmt))
206    if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
207      {
208	if (is_named_call_p (callee_fndecl, "fread", call, 4))
209	  {
210	    tree arg = gimple_call_arg (call, 0);
211	    arg = sm_ctxt->get_readable_tree (arg);
212
213	    sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
214
215	    /* Dereference an ADDR_EXPR.  */
216	    // TODO: should the engine do this?
217	    if (TREE_CODE (arg) == ADDR_EXPR)
218	      sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
219				      m_start, m_tainted);
220	    return true;
221	  }
222      }
223  // TODO: ...etc; many other sources of untrusted data
224
225  if (const gassign *assign = dyn_cast <const gassign *> (stmt))
226    {
227      tree rhs1 = gimple_assign_rhs1 (assign);
228      enum tree_code op = gimple_assign_rhs_code (assign);
229
230      /* Check array accesses.  */
231      if (op == ARRAY_REF)
232	{
233	  tree arg = TREE_OPERAND (rhs1, 1);
234	  arg = sm_ctxt->get_readable_tree (arg);
235
236	  /* Unsigned types have an implicit lower bound.  */
237	  bool is_unsigned = false;
238	  if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
239	    is_unsigned = TYPE_UNSIGNED (TREE_TYPE (arg));
240
241	  /* Complain about missing bounds.  */
242	  sm_ctxt->warn_for_state
243	    (node, stmt, arg, m_tainted,
244	     new tainted_array_index (*this, arg,
245				      is_unsigned
246				      ? BOUNDS_LOWER : BOUNDS_NONE));
247	  sm_ctxt->on_transition (node, stmt, arg, m_tainted, m_stop);
248
249	  /* Complain about missing upper bound.  */
250	  sm_ctxt->warn_for_state  (node, stmt, arg, m_has_lb,
251				    new tainted_array_index (*this, arg,
252							     BOUNDS_LOWER));
253	  sm_ctxt->on_transition (node, stmt, arg, m_has_lb, m_stop);
254
255	  /* Complain about missing lower bound.  */
256	  if (!is_unsigned)
257	    {
258	      sm_ctxt->warn_for_state  (node, stmt, arg, m_has_ub,
259					new tainted_array_index (*this, arg,
260								 BOUNDS_UPPER));
261	      sm_ctxt->on_transition (node, stmt, arg, m_has_ub, m_stop);
262	    }
263	}
264    }
265
266  return false;
267}
268
269/* Implementation of state_machine::on_condition vfunc for taint_state_machine.
270   Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
271   and states 'has_ub' and 'has_lb' to 'stop'.  */
272
273void
274taint_state_machine::on_condition (sm_context *sm_ctxt,
275				   const supernode *node,
276				   const gimple *stmt,
277				   tree lhs,
278				   enum tree_code op,
279				   tree rhs ATTRIBUTE_UNUSED) const
280{
281  if (stmt == NULL)
282    return;
283
284  // TODO: this doesn't use the RHS; should we make it symmetric?
285
286  // TODO
287  switch (op)
288    {
289      //case NE_EXPR:
290      //case EQ_EXPR:
291    case GE_EXPR:
292    case GT_EXPR:
293      {
294	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
295				m_has_lb);
296	sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
297				m_stop);
298      }
299      break;
300    case LE_EXPR:
301    case LT_EXPR:
302      {
303	sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
304				m_has_ub);
305	sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
306				m_stop);
307      }
308      break;
309    default:
310      break;
311    }
312}
313
314bool
315taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
316{
317  return true;
318}
319
320} // anonymous namespace
321
322/* Internal interface to this file. */
323
324state_machine *
325make_taint_state_machine (logger *logger)
326{
327  return new taint_state_machine (logger);
328}
329
330} // namespace ana
331
332#endif /* #if ENABLE_ANALYZER */
333