1/* An experimental state machine, for tracking exposure of sensitive
2   data (e.g. through logging).
3   Copyright (C) 2019-2020 Free Software Foundation, Inc.
4   Contributed by David Malcolm <dmalcolm@redhat.com>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 3, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tree.h"
26#include "function.h"
27#include "function.h"
28#include "basic-block.h"
29#include "gimple.h"
30#include "options.h"
31#include "diagnostic-path.h"
32#include "diagnostic-metadata.h"
33#include "function.h"
34#include "analyzer/analyzer.h"
35#include "diagnostic-event-id.h"
36#include "analyzer/analyzer-logging.h"
37#include "analyzer/sm.h"
38#include "analyzer/pending-diagnostic.h"
39
40#if ENABLE_ANALYZER
41
42namespace ana {
43
44namespace {
45
46/* An experimental state machine, for tracking exposure of sensitive
47   data (e.g. through logging).  */
48
49class sensitive_state_machine : public state_machine
50{
51public:
52  sensitive_state_machine (logger *logger);
53
54  bool inherited_state_p () const FINAL OVERRIDE { return true; }
55
56  bool on_stmt (sm_context *sm_ctxt,
57		const supernode *node,
58		const gimple *stmt) const FINAL OVERRIDE;
59
60  void on_condition (sm_context *sm_ctxt,
61		     const supernode *node,
62		     const gimple *stmt,
63		     tree lhs,
64		     enum tree_code op,
65		     tree rhs) const FINAL OVERRIDE;
66
67  bool can_purge_p (state_t s) const FINAL OVERRIDE;
68
69  /* Start state.  */
70  state_t m_start;
71
72  /* State for "sensitive" data, such as a password.  */
73  state_t m_sensitive;
74
75  /* Stop state, for a value we don't want to track any more.  */
76  state_t m_stop;
77
78private:
79  void warn_for_any_exposure (sm_context *sm_ctxt,
80			      const supernode *node,
81			      const gimple *stmt,
82			      tree arg) const;
83};
84
85class exposure_through_output_file
86  : public pending_diagnostic_subclass<exposure_through_output_file>
87{
88public:
89  exposure_through_output_file (const sensitive_state_machine &sm, tree arg)
90  : m_sm (sm), m_arg (arg)
91  {}
92
93  const char *get_kind () const FINAL OVERRIDE
94  {
95    return "exposure_through_output_file";
96  }
97
98  bool operator== (const exposure_through_output_file &other) const
99  {
100    return same_tree_p (m_arg, other.m_arg);
101  }
102
103  bool emit (rich_location *rich_loc) FINAL OVERRIDE
104  {
105    diagnostic_metadata m;
106    /* CWE-532: Information Exposure Through Log Files */
107    m.add_cwe (532);
108    return warning_meta (rich_loc, m,
109			 OPT_Wanalyzer_exposure_through_output_file,
110			 "sensitive value %qE written to output file",
111			 m_arg);
112  }
113
114  label_text describe_state_change (const evdesc::state_change &change)
115    FINAL OVERRIDE
116  {
117    if (change.m_new_state == m_sm.m_sensitive)
118      {
119	m_first_sensitive_event = change.m_event_id;
120	return change.formatted_print ("sensitive value acquired here");
121      }
122    return label_text ();
123  }
124
125  label_text describe_call_with_state (const evdesc::call_with_state &info)
126    FINAL OVERRIDE
127  {
128    if (info.m_state == m_sm.m_sensitive)
129      return info.formatted_print
130	("passing sensitive value %qE in call to %qE from %qE",
131	 info.m_expr, info.m_callee_fndecl, info.m_caller_fndecl);
132    return label_text ();
133  }
134
135  label_text describe_return_of_state (const evdesc::return_of_state &info)
136    FINAL OVERRIDE
137  {
138    if (info.m_state == m_sm.m_sensitive)
139      return info.formatted_print ("returning sensitive value to %qE from %qE",
140				   info.m_caller_fndecl, info.m_callee_fndecl);
141    return label_text ();
142  }
143
144  label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
145  {
146    if (m_first_sensitive_event.known_p ())
147      return ev.formatted_print ("sensitive value %qE written to output file"
148				 "; acquired at %@",
149				 m_arg, &m_first_sensitive_event);
150    else
151      return ev.formatted_print ("sensitive value %qE written to output file",
152				 m_arg);
153  }
154
155private:
156  const sensitive_state_machine &m_sm;
157  tree m_arg;
158  diagnostic_event_id_t m_first_sensitive_event;
159};
160
161/* sensitive_state_machine's ctor.  */
162
163sensitive_state_machine::sensitive_state_machine (logger *logger)
164: state_machine ("sensitive", logger)
165{
166  m_start = add_state ("start");
167  m_sensitive = add_state ("sensitive");
168  m_stop = add_state ("stop");
169}
170
171/* Warn about an exposure at NODE and STMT if ARG is in the "sensitive"
172   state.  */
173
174void
175sensitive_state_machine::warn_for_any_exposure (sm_context *sm_ctxt,
176						const supernode *node,
177						const gimple *stmt,
178						tree arg) const
179{
180  sm_ctxt->warn_for_state (node, stmt, arg, m_sensitive,
181			   new exposure_through_output_file (*this, arg));
182}
183
184/* Implementation of state_machine::on_stmt vfunc for
185   sensitive_state_machine.  */
186
187bool
188sensitive_state_machine::on_stmt (sm_context *sm_ctxt,
189				  const supernode *node,
190				  const gimple *stmt) const
191{
192  if (const gcall *call = dyn_cast <const gcall *> (stmt))
193    if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
194      {
195	if (is_named_call_p (callee_fndecl, "getpass", call, 1))
196	  {
197	    tree lhs = gimple_call_lhs (call);
198	    if (lhs)
199	      sm_ctxt->on_transition (node, stmt, lhs, m_start, m_sensitive);
200	    return true;
201	  }
202	else if (is_named_call_p (callee_fndecl, "fprintf")
203		 || is_named_call_p (callee_fndecl, "printf"))
204	  {
205	    /* Handle a match at any position in varargs.  */
206	    for (unsigned idx = 1; idx < gimple_call_num_args (call); idx++)
207	      {
208		tree arg = gimple_call_arg (call, idx);
209		warn_for_any_exposure (sm_ctxt, node, stmt, arg);
210	      }
211	    return true;
212	  }
213	else if (is_named_call_p (callee_fndecl, "fwrite", call, 4))
214	  {
215	    tree arg = gimple_call_arg (call, 0);
216	    warn_for_any_exposure (sm_ctxt, node, stmt, arg);
217	    return true;
218	  }
219	// TODO: ...etc.  This is just a proof-of-concept at this point.
220      }
221  return false;
222}
223
224void
225sensitive_state_machine::on_condition (sm_context *sm_ctxt ATTRIBUTE_UNUSED,
226				       const supernode *node ATTRIBUTE_UNUSED,
227				       const gimple *stmt ATTRIBUTE_UNUSED,
228				       tree lhs ATTRIBUTE_UNUSED,
229				       enum tree_code op ATTRIBUTE_UNUSED,
230				       tree rhs ATTRIBUTE_UNUSED) const
231{
232  /* Empty.  */
233}
234
235bool
236sensitive_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
237{
238  return true;
239}
240
241} // anonymous namespace
242
243/* Internal interface to this file. */
244
245state_machine *
246make_sensitive_state_machine (logger *logger)
247{
248  return new sensitive_state_machine (logger);
249}
250
251} // namespace ana
252
253#endif /* #if ENABLE_ANALYZER */
254