1/* ========================================================================== **
2 *                                debugparse.c
3 *
4 * Copyright (C) 1998 by Christopher R. Hertel
5 *
6 * Email: crh@ubiqx.mn.org
7 *
8 * -------------------------------------------------------------------------- **
9 * This module is a very simple parser for Samba debug log files.
10 * -------------------------------------------------------------------------- **
11 *
12 *  This library is free software; you can redistribute it and/or
13 *  modify it under the terms of the GNU Library General Public
14 *  License as published by the Free Software Foundation; either
15 *  version 2 of the License, or (at your option) any later version.
16 *
17 *  This library is distributed in the hope that it will be useful,
18 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
19 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 *  Library General Public License for more details.
21 *
22 *  You should have received a copy of the GNU Library General Public
23 *  License along with this library; if not, write to the Free
24 *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 *
26 * -------------------------------------------------------------------------- **
27 * The important function in this module is dbg_char2token().  The rest is
28 * basically fluff.  (Potentially useful fluff, but still fluff.)
29 * ========================================================================== **
30 */
31
32#include "debugparse.h"
33
34/* -------------------------------------------------------------------------- **
35 * Constants...
36 *
37 *  DBG_BSIZE - This internal constant is used only by dbg_test().  It is the
38 *          size of the read buffer.  I've tested the function using a
39 *          DBG_BSIZE value of 2.
40 */
41
42#define DBG_BSIZE 128
43
44/* -------------------------------------------------------------------------- **
45 * Functions...
46 */
47
48const char *dbg_token2string( dbg_Token tok )
49  /* ------------------------------------------------------------------------ **
50   * Given a token, return a string describing the token.
51   *
52   *  Input:  tok - One of the set of dbg_Tokens defined in debugparse.h.
53   *
54   *  Output: A string identifying the token.  This is useful for debugging,
55   *          etc.
56   *
57   *  Note:   If the token is not known, this function will return the
58   *          string "<unknown>".
59   *
60   * ------------------------------------------------------------------------ **
61   */
62  {
63  switch( tok )
64    {
65    case dbg_null:
66      return( "null" );
67    case dbg_ignore:
68      return( "ignore" );
69    case dbg_header:
70      return( "header" );
71    case dbg_timestamp:
72      return( "time stamp" );
73    case dbg_level:
74      return( "level" );
75    case dbg_sourcefile:
76      return( "source file" );
77    case dbg_function:
78      return( "function" );
79    case dbg_lineno:
80      return( "line number" );
81    case dbg_message:
82      return( "message" );
83    case dbg_eof:
84      return( "[EOF]" );
85    }
86  return( "<unknown>" );
87  } /* dbg_token2string */
88
89dbg_Token dbg_char2token( dbg_Token *state, int c )
90  /* ------------------------------------------------------------------------ **
91   * Parse input one character at a time.
92   *
93   *  Input:  state - A pointer to a token variable.  This is used to
94   *                  maintain the parser state between calls.  For
95   *                  each input stream, you should set up a separate
96   *                  state variable and initialize it to dbg_null.
97   *                  Pass a pointer to it into this function with each
98   *                  character in the input stream.  See dbg_test()
99   *                  for an example.
100   *          c     - The "current" character in the input stream.
101   *
102   *  Output: A token.
103   *          The token value will change when delimiters are found,
104   *          which indicate a transition between syntactical objects.
105   *          Possible return values are:
106   *
107   *          dbg_null        - The input character was an end-of-line.
108   *                            This resets the parser to its initial state
109   *                            in preparation for parsing the next line.
110   *          dbg_eof         - Same as dbg_null, except that the character
111   *                            was an end-of-file.
112   *          dbg_ignore      - Returned for whitespace and delimiters.
113   *                            These lexical tokens are only of interest
114   *                            to the parser.
115   *          dbg_header      - Indicates the start of a header line.  The
116   *                            input character was '[' and was the first on
117   *                            the line.
118   *          dbg_timestamp   - Indicates that the input character was part
119   *                            of a header timestamp.
120   *          dbg_level       - Indicates that the input character was part
121   *                            of the debug-level value in the header.
122   *          dbg_sourcefile  - Indicates that the input character was part
123   *                            of the sourcefile name in the header.
124   *          dbg_function    - Indicates that the input character was part
125   *                            of the function name in the header.
126   *          dbg_lineno      - Indicates that the input character was part
127   *                            of the DEBUG call line number in the header.
128   *          dbg_message     - Indicates that the input character was part
129   *                            of the DEBUG message text.
130   *
131   * ------------------------------------------------------------------------ **
132   */
133  {
134  /* The terminating characters that we see will greatly depend upon
135   * how they are read.  For example, if gets() is used instead of
136   * fgets(), then we will not see newline characters.  A lot also
137   * depends on the calling function, which may handle terminators
138   * itself.
139   *
140   * '\n', '\0', and EOF are all considered line terminators.  The
141   * dbg_eof token is sent back if an EOF is encountered.
142   *
143   * Warning:  only allow the '\0' character to be sent if you are
144   *           using gets() to read whole lines (thus replacing '\n'
145   *           with '\0').  Sending '\0' at the wrong time will mess
146   *           up the parsing.
147   */
148  switch( c )
149    {
150    case EOF:
151      *state = dbg_null;   /* Set state to null (initial state) so */
152      return( dbg_eof );   /* that we can restart with new input.  */
153    case '\n':
154    case '\0':
155      *state = dbg_null;   /* A newline or eoln resets to the null state. */
156      return( dbg_null );
157    }
158
159  /* When within the body of the message, only a line terminator
160   * can cause a change of state.  We've already checked for line
161   * terminators, so if the current state is dbg_msgtxt, simply
162   * return that as our current token.
163   */
164  if( dbg_message == *state )
165    return( dbg_message );
166
167  /* If we are at the start of a new line, and the input character
168   * is an opening bracket, then the line is a header line, otherwise
169   * it's a message body line.
170   */
171  if( dbg_null == *state )
172    {
173    if( '[' == c )
174      {
175      *state = dbg_timestamp;
176      return( dbg_header );
177      }
178    *state = dbg_message;
179    return( dbg_message );
180    }
181
182  /* We've taken care of terminators, text blocks and new lines.
183   * The remaining possibilities are all within the header line
184   * itself.
185   */
186
187  /* Within the header line, whitespace can be ignored *except*
188   * within the timestamp.
189   */
190  if( isspace( c ) )
191    {
192    /* Fudge.  The timestamp may contain space characters. */
193    if( (' ' == c) && (dbg_timestamp == *state) )
194      return( dbg_timestamp );
195    /* Otherwise, ignore whitespace. */
196    return( dbg_ignore );
197    }
198
199  /* Okay, at this point we know we're somewhere in the header.
200   * Valid header *states* are: dbg_timestamp, dbg_level,
201   * dbg_sourcefile, dbg_function, and dbg_lineno.
202   */
203  switch( c )
204    {
205    case ',':
206      if( dbg_timestamp == *state )
207        {
208        *state = dbg_level;
209        return( dbg_ignore );
210        }
211      break;
212    case ']':
213      if( dbg_level == *state )
214        {
215        *state = dbg_sourcefile;
216        return( dbg_ignore );
217        }
218      break;
219    case ':':
220      if( dbg_sourcefile == *state )
221        {
222        *state = dbg_function;
223        return( dbg_ignore );
224        }
225      break;
226    case '(':
227      if( dbg_function == *state )
228        {
229        *state = dbg_lineno;
230        return( dbg_ignore );
231        }
232      break;
233    case ')':
234      if( dbg_lineno == *state )
235        {
236        *state = dbg_null;
237        return( dbg_ignore );
238        }
239      break;
240    }
241
242  /* If the previous block did not result in a state change, then
243   * return the current state as the current token.
244   */
245  return( *state );
246  } /* dbg_char2token */
247
248void dbg_test( void );
249void dbg_test( void )
250  /* ------------------------------------------------------------------------ **
251   * Simple test function.
252   *
253   *  Input:  none.
254   *  Output: none.
255   *  Notes:  This function was used to test dbg_char2token().  It reads a
256   *          Samba log file from stdin and prints parsing info to stdout.
257   *          It also serves as a simple example.
258   *
259   * ------------------------------------------------------------------------ **
260   */
261  {
262  char bufr[DBG_BSIZE];
263  int  i;
264  int  linecount  = 1;
265  dbg_Token old   = dbg_null,
266            newtok= dbg_null,
267            state = dbg_null;
268
269  while( fgets( bufr, DBG_BSIZE, stdin ) )
270    {
271    for( i = 0; bufr[i]; i++ )
272      {
273      old = newtok;
274      newtok = dbg_char2token( &state, bufr[i] );
275      switch( newtok )
276        {
277        case dbg_header:
278          if( linecount > 1 )
279            (void)putchar( '\n' );
280          break;
281        case dbg_null:
282          linecount++;
283          break;
284        case dbg_ignore:
285          break;
286        default:
287          if( old != newtok )
288            (void)printf( "\n[%05d]%12s: ", linecount, dbg_token2string(newtok) );
289          (void)putchar( bufr[i] );
290        }
291      }
292    }
293  (void)putchar( '\n' );
294  } /* dbg_test */
295
296
297/* -------------------------------------------------------------------------- **
298 * This simple main line can be uncommented and used to test the parser.
299 */
300
301/*
302 * int main( void )
303 *  {
304 *  dbg_test();
305 *  return( 0 );
306 *  }
307 */
308
309/* ========================================================================== */
310