1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * logresolve 2.0 19 * 20 * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/ 21 * UUNET Canada, April 16, 1995 22 * 23 * Rewritten by David Robinson. (drtr ast.cam.ac.uk) 24 * Rewritten again, and ported to APR by Colm MacCarthaigh 25 * 26 * Usage: logresolve [-s filename] [-c] < access_log > new_log 27 * 28 * Arguments: 29 * -s filename name of a file to record statistics 30 * -c check the DNS for a matching A record for the host. 31 * 32 * Notes: (For historical interest) 33 * 34 * To generate meaningful statistics from an HTTPD log file, it's good 35 * to have the domain name of each machine that accessed your site, but 36 * doing this on the fly can slow HTTPD down. 37 * 38 * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname 39 * resolution off. Before running your stats program, just run your log 40 * file through this program (logresolve) and all of your IP numbers will 41 * be resolved into hostnames (where possible). 42 * 43 * logresolve takes an HTTPD access log (in the COMMON log file format, 44 * or any other format that has the IP number/domain name as the first 45 * field for that matter), and outputs the same file with all of the 46 * domain names looked up. Where no domain name can be found, the IP 47 * number is left in. 48 * 49 * To minimize impact on your nameserver, logresolve has its very own 50 * internal hash-table cache. This means that each IP number will only 51 * be looked up the first time it is found in the log file. 52 * 53 * The -c option causes logresolve to apply the same check as httpd 54 * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP 55 * address, it looks up the IP addresses for the hostname and checks 56 * that one of these matches the original address. 57 */ 58 59#include "apr.h" 60#include "apr_lib.h" 61#include "apr_hash.h" 62#include "apr_getopt.h" 63#include "apr_strings.h" 64#include "apr_file_io.h" 65#include "apr_network_io.h" 66 67#if APR_HAVE_STDLIB_H 68#include <stdlib.h> 69#endif 70 71#define READ_BUF_SIZE 128*1024 72#define WRITE_BUF_SIZE 128*1024 73#define LINE_BUF_SIZE 128*1024 74 75static apr_file_t *errfile; 76static const char *shortname = "logresolve"; 77static apr_hash_t *cache; 78 79/* Statistics */ 80static int cachehits = 0; 81static int cachesize = 0; 82static int entries = 0; 83static int resolves = 0; 84static int withname = 0; 85static int doublefailed = 0; 86static int noreverse = 0; 87 88/* 89 * prints various statistics to output 90 */ 91#define NL APR_EOL_STR 92static void print_statistics (apr_file_t *output) 93{ 94 apr_file_printf(output, "logresolve Statistics:" NL); 95 apr_file_printf(output, "Entries: %d" NL, entries); 96 apr_file_printf(output, " With name : %d" NL, withname); 97 apr_file_printf(output, " Resolves : %d" NL, resolves); 98 99 if (noreverse) { 100 apr_file_printf(output, " - No reverse : %d" NL, 101 noreverse); 102 } 103 104 if (doublefailed) { 105 apr_file_printf(output, " - Double lookup failed : %d" NL, 106 doublefailed); 107 } 108 109 apr_file_printf(output, "Cache hits : %d" NL, cachehits); 110 apr_file_printf(output, "Cache size : %d" NL, cachesize); 111} 112 113/* 114 * usage info 115 */ 116static void usage(void) 117{ 118 apr_file_printf(errfile, 119 "%s -- Resolve IP-addresses to hostnames in Apache log files." NL 120 "Usage: %s [-s STATFILE] [-c]" NL 121 NL 122 "Options:" NL 123 " -s Record statistics to STATFILE when finished." NL 124 NL 125 " -c Perform double lookups when resolving IP addresses." NL, 126 shortname, shortname); 127 exit(1); 128} 129#undef NL 130 131int main(int argc, const char * const argv[]) 132{ 133 apr_file_t * outfile; 134 apr_file_t * infile; 135 apr_getopt_t * o; 136 apr_pool_t * pool; 137 apr_pool_t *pline; 138 apr_status_t status; 139 const char * arg; 140 char * stats = NULL; 141 char * inbuffer; 142 char * outbuffer; 143 char * line; 144 int doublelookups = 0; 145 146 if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) { 147 return 1; 148 } 149 atexit(apr_terminate); 150 151 if (argc) { 152 shortname = apr_filepath_name_get(argv[0]); 153 } 154 155 if (apr_pool_create(&pool, NULL) != APR_SUCCESS) { 156 return 1; 157 } 158 apr_file_open_stderr(&errfile, pool); 159 apr_getopt_init(&o, pool, argc, argv); 160 161 while (1) { 162 char opt; 163 status = apr_getopt(o, "s:c", &opt, &arg); 164 if (status == APR_EOF) { 165 break; 166 } 167 else if (status != APR_SUCCESS) { 168 usage(); 169 } 170 else { 171 switch (opt) { 172 case 'c': 173 if (doublelookups) { 174 usage(); 175 } 176 doublelookups = 1; 177 break; 178 case 's': 179 if (stats) { 180 usage(); 181 } 182 stats = apr_pstrdup(pool, arg); 183 break; 184 } /* switch */ 185 } /* else */ 186 } /* while */ 187 188 apr_file_open_stdout(&outfile, pool); 189 apr_file_open_stdin(&infile, pool); 190 191 /* Allocate two new 10k file buffers */ 192 if ( (outbuffer = apr_palloc(pool, WRITE_BUF_SIZE)) == NULL 193 || (inbuffer = apr_palloc(pool, READ_BUF_SIZE)) == NULL 194 || (line = apr_palloc(pool, LINE_BUF_SIZE)) == NULL) { 195 return 1; 196 } 197 198 /* Set the buffers */ 199 apr_file_buffer_set(infile, inbuffer, READ_BUF_SIZE); 200 apr_file_buffer_set(outfile, outbuffer, WRITE_BUF_SIZE); 201 202 cache = apr_hash_make(pool); 203 if(apr_pool_create(&pline, pool) != APR_SUCCESS){ 204 return 1; 205 } 206 207 while (apr_file_gets(line, LINE_BUF_SIZE, infile) == APR_SUCCESS) { 208 char *hostname; 209 char *space; 210 apr_sockaddr_t *ip; 211 apr_sockaddr_t *ipdouble; 212 char dummy[] = " " APR_EOL_STR; 213 214 if (line[0] == '\0') { 215 continue; 216 } 217 218 /* Count our log entries */ 219 entries++; 220 221 /* Check if this could even be an IP address */ 222 if (!apr_isxdigit(line[0]) && line[0] != ':') { 223 withname++; 224 apr_file_puts(line, outfile); 225 continue; 226 } 227 228 /* Terminate the line at the next space */ 229 if ((space = strchr(line, ' ')) != NULL) { 230 *space = '\0'; 231 } 232 else { 233 space = dummy; 234 } 235 236 /* See if we have it in our cache */ 237 hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING); 238 if (hostname) { 239 apr_file_printf(outfile, "%s %s", hostname, space + 1); 240 cachehits++; 241 continue; 242 } 243 244 /* Parse the IP address */ 245 status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC, 0, 0, pline); 246 if (status != APR_SUCCESS) { 247 /* Not an IP address */ 248 withname++; 249 *space = ' '; 250 apr_file_puts(line, outfile); 251 continue; 252 } 253 254 /* This does not make much sense, but historically "resolves" means 255 * "parsed as an IP address". It does not mean we actually resolved 256 * the IP address into a hostname. 257 */ 258 resolves++; 259 260 /* From here on our we cache each result, even if it was not 261 * succesful 262 */ 263 cachesize++; 264 265 /* Try and perform a reverse lookup */ 266 status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS; 267 if (status || hostname == NULL) { 268 /* Could not perform a reverse lookup */ 269 *space = ' '; 270 apr_file_puts(line, outfile); 271 noreverse++; 272 273 /* Add to cache */ 274 *space = '\0'; 275 apr_hash_set(cache, line, APR_HASH_KEY_STRING, 276 apr_pstrdup(apr_hash_pool_get(cache), line)); 277 continue; 278 } 279 280 /* Perform a double lookup */ 281 if (doublelookups) { 282 /* Do a forward lookup on our hostname, and see if that matches our 283 * original IP address. 284 */ 285 status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0, 286 0, pline); 287 if (status == APR_SUCCESS || 288 memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) { 289 /* Double-lookup failed */ 290 *space = ' '; 291 apr_file_puts(line, outfile); 292 doublefailed++; 293 294 /* Add to cache */ 295 *space = '\0'; 296 apr_hash_set(cache, line, APR_HASH_KEY_STRING, 297 apr_pstrdup(apr_hash_pool_get(cache), line)); 298 continue; 299 } 300 } 301 302 /* Outout the resolved name */ 303 apr_file_printf(outfile, "%s %s", hostname, space + 1); 304 305 /* Store it in the cache */ 306 apr_hash_set(cache, line, APR_HASH_KEY_STRING, 307 apr_pstrdup(apr_hash_pool_get(cache), hostname)); 308 309 apr_pool_clear(pline); 310 } 311 312 /* Flush any remaining output */ 313 apr_file_flush(outfile); 314 315 if (stats) { 316 apr_file_t *statsfile; 317 if (apr_file_open(&statsfile, stats, 318 APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE, 319 APR_OS_DEFAULT, pool) != APR_SUCCESS) { 320 apr_file_printf(errfile, "%s: Could not open %s for writing.", 321 shortname, stats); 322 return 1; 323 } 324 print_statistics(statsfile); 325 apr_file_close(statsfile); 326 } 327 328 return 0; 329} 330