1/* ========================================================================== ** 2 * debugparse.c 3 * 4 * Copyright (C) 1998 by Christopher R. Hertel 5 * 6 * Email: crh@ubiqx.mn.org 7 * 8 * -------------------------------------------------------------------------- ** 9 * This module is a very simple parser for Samba debug log files. 10 * -------------------------------------------------------------------------- ** 11 * 12 * This library is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Library General Public 14 * License as published by the Free Software Foundation; either 15 * version 2 of the License, or (at your option) any later version. 16 * 17 * This library is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Library General Public License for more details. 21 * 22 * You should have received a copy of the GNU Library General Public 23 * License along with this library; if not, write to the Free 24 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 25 * 26 * -------------------------------------------------------------------------- ** 27 * The important function in this module is dbg_char2token(). The rest is 28 * basically fluff. (Potentially useful fluff, but still fluff.) 29 * ========================================================================== ** 30 */ 31 32#include "debugparse.h" 33 34/* -------------------------------------------------------------------------- ** 35 * Constants... 36 * 37 * DBG_BSIZE - This internal constant is used only by dbg_test(). It is the 38 * size of the read buffer. I've tested the function using a 39 * DBG_BSIZE value of 2. 40 */ 41 42#define DBG_BSIZE 128 43 44/* -------------------------------------------------------------------------- ** 45 * Functions... 46 */ 47 48const char *dbg_token2string( dbg_Token tok ) 49 /* ------------------------------------------------------------------------ ** 50 * Given a token, return a string describing the token. 51 * 52 * Input: tok - One of the set of dbg_Tokens defined in debugparse.h. 53 * 54 * Output: A string identifying the token. This is useful for debugging, 55 * etc. 56 * 57 * Note: If the token is not known, this function will return the 58 * string "<unknown>". 59 * 60 * ------------------------------------------------------------------------ ** 61 */ 62 { 63 switch( tok ) 64 { 65 case dbg_null: 66 return( "null" ); 67 case dbg_ignore: 68 return( "ignore" ); 69 case dbg_header: 70 return( "header" ); 71 case dbg_timestamp: 72 return( "time stamp" ); 73 case dbg_level: 74 return( "level" ); 75 case dbg_sourcefile: 76 return( "source file" ); 77 case dbg_function: 78 return( "function" ); 79 case dbg_lineno: 80 return( "line number" ); 81 case dbg_message: 82 return( "message" ); 83 case dbg_eof: 84 return( "[EOF]" ); 85 } 86 return( "<unknown>" ); 87 } /* dbg_token2string */ 88 89dbg_Token dbg_char2token( dbg_Token *state, int c ) 90 /* ------------------------------------------------------------------------ ** 91 * Parse input one character at a time. 92 * 93 * Input: state - A pointer to a token variable. This is used to 94 * maintain the parser state between calls. For 95 * each input stream, you should set up a separate 96 * state variable and initialize it to dbg_null. 97 * Pass a pointer to it into this function with each 98 * character in the input stream. See dbg_test() 99 * for an example. 100 * c - The "current" character in the input stream. 101 * 102 * Output: A token. 103 * The token value will change when delimiters are found, 104 * which indicate a transition between syntactical objects. 105 * Possible return values are: 106 * 107 * dbg_null - The input character was an end-of-line. 108 * This resets the parser to its initial state 109 * in preparation for parsing the next line. 110 * dbg_eof - Same as dbg_null, except that the character 111 * was an end-of-file. 112 * dbg_ignore - Returned for whitespace and delimiters. 113 * These lexical tokens are only of interest 114 * to the parser. 115 * dbg_header - Indicates the start of a header line. The 116 * input character was '[' and was the first on 117 * the line. 118 * dbg_timestamp - Indicates that the input character was part 119 * of a header timestamp. 120 * dbg_level - Indicates that the input character was part 121 * of the debug-level value in the header. 122 * dbg_sourcefile - Indicates that the input character was part 123 * of the sourcefile name in the header. 124 * dbg_function - Indicates that the input character was part 125 * of the function name in the header. 126 * dbg_lineno - Indicates that the input character was part 127 * of the DEBUG call line number in the header. 128 * dbg_message - Indicates that the input character was part 129 * of the DEBUG message text. 130 * 131 * ------------------------------------------------------------------------ ** 132 */ 133 { 134 /* The terminating characters that we see will greatly depend upon 135 * how they are read. For example, if gets() is used instead of 136 * fgets(), then we will not see newline characters. A lot also 137 * depends on the calling function, which may handle terminators 138 * itself. 139 * 140 * '\n', '\0', and EOF are all considered line terminators. The 141 * dbg_eof token is sent back if an EOF is encountered. 142 * 143 * Warning: only allow the '\0' character to be sent if you are 144 * using gets() to read whole lines (thus replacing '\n' 145 * with '\0'). Sending '\0' at the wrong time will mess 146 * up the parsing. 147 */ 148 switch( c ) 149 { 150 case EOF: 151 *state = dbg_null; /* Set state to null (initial state) so */ 152 return( dbg_eof ); /* that we can restart with new input. */ 153 case '\n': 154 case '\0': 155 *state = dbg_null; /* A newline or eoln resets to the null state. */ 156 return( dbg_null ); 157 } 158 159 /* When within the body of the message, only a line terminator 160 * can cause a change of state. We've already checked for line 161 * terminators, so if the current state is dbg_msgtxt, simply 162 * return that as our current token. 163 */ 164 if( dbg_message == *state ) 165 return( dbg_message ); 166 167 /* If we are at the start of a new line, and the input character 168 * is an opening bracket, then the line is a header line, otherwise 169 * it's a message body line. 170 */ 171 if( dbg_null == *state ) 172 { 173 if( '[' == c ) 174 { 175 *state = dbg_timestamp; 176 return( dbg_header ); 177 } 178 *state = dbg_message; 179 return( dbg_message ); 180 } 181 182 /* We've taken care of terminators, text blocks and new lines. 183 * The remaining possibilities are all within the header line 184 * itself. 185 */ 186 187 /* Within the header line, whitespace can be ignored *except* 188 * within the timestamp. 189 */ 190 if( isspace( c ) ) 191 { 192 /* Fudge. The timestamp may contain space characters. */ 193 if( (' ' == c) && (dbg_timestamp == *state) ) 194 return( dbg_timestamp ); 195 /* Otherwise, ignore whitespace. */ 196 return( dbg_ignore ); 197 } 198 199 /* Okay, at this point we know we're somewhere in the header. 200 * Valid header *states* are: dbg_timestamp, dbg_level, 201 * dbg_sourcefile, dbg_function, and dbg_lineno. 202 */ 203 switch( c ) 204 { 205 case ',': 206 if( dbg_timestamp == *state ) 207 { 208 *state = dbg_level; 209 return( dbg_ignore ); 210 } 211 break; 212 case ']': 213 if( dbg_level == *state ) 214 { 215 *state = dbg_sourcefile; 216 return( dbg_ignore ); 217 } 218 break; 219 case ':': 220 if( dbg_sourcefile == *state ) 221 { 222 *state = dbg_function; 223 return( dbg_ignore ); 224 } 225 break; 226 case '(': 227 if( dbg_function == *state ) 228 { 229 *state = dbg_lineno; 230 return( dbg_ignore ); 231 } 232 break; 233 case ')': 234 if( dbg_lineno == *state ) 235 { 236 *state = dbg_null; 237 return( dbg_ignore ); 238 } 239 break; 240 } 241 242 /* If the previous block did not result in a state change, then 243 * return the current state as the current token. 244 */ 245 return( *state ); 246 } /* dbg_char2token */ 247 248void dbg_test( void ); 249void dbg_test( void ) 250 /* ------------------------------------------------------------------------ ** 251 * Simple test function. 252 * 253 * Input: none. 254 * Output: none. 255 * Notes: This function was used to test dbg_char2token(). It reads a 256 * Samba log file from stdin and prints parsing info to stdout. 257 * It also serves as a simple example. 258 * 259 * ------------------------------------------------------------------------ ** 260 */ 261 { 262 char bufr[DBG_BSIZE]; 263 int i; 264 int linecount = 1; 265 dbg_Token old = dbg_null, 266 newtok= dbg_null, 267 state = dbg_null; 268 269 while( fgets( bufr, DBG_BSIZE, stdin ) ) 270 { 271 for( i = 0; bufr[i]; i++ ) 272 { 273 old = newtok; 274 newtok = dbg_char2token( &state, bufr[i] ); 275 switch( newtok ) 276 { 277 case dbg_header: 278 if( linecount > 1 ) 279 (void)putchar( '\n' ); 280 break; 281 case dbg_null: 282 linecount++; 283 break; 284 case dbg_ignore: 285 break; 286 default: 287 if( old != newtok ) 288 (void)printf( "\n[%05d]%12s: ", linecount, dbg_token2string(newtok) ); 289 (void)putchar( bufr[i] ); 290 } 291 } 292 } 293 (void)putchar( '\n' ); 294 } /* dbg_test */ 295 296 297/* -------------------------------------------------------------------------- ** 298 * This simple main line can be uncommented and used to test the parser. 299 */ 300 301/* 302 * int main( void ) 303 * { 304 * dbg_test(); 305 * return( 0 ); 306 * } 307 */ 308 309/* ========================================================================== */ 310