1#!/usr/bin/perl
2#
3# This script parses the default logfile format produced by rsync when running
4# as a daemon with transfer logging enabled.  It also parses a slightly tweaked
5# version of the default format where %o has been replaced with %i.
6#
7# This script is derived from the xferstats script that comes with wuftpd.  See
8# the usage message at the bottom for the options it takes.
9#
10# Andrew Tridgell, October 1998
11
12use Getopt::Long;
13
14# You may wish to edit the next line to customize for your default log file.
15$usage_file = "/var/log/rsyncd.log";
16
17# Edit the following lines for default report settings.
18# Entries defined here will be over-ridden by the command line.
19
20$hourly_report = 0; 
21$domain_report = 0;
22$total_report = 0;
23$depth_limit = 9999;
24$only_section = '';
25
26&Getopt::Long::Configure('bundling');
27&usage if !&GetOptions(
28    'hourly-report|h' => \$hourly_report,
29    'domain-report|d' => \$domain_report,
30    'domain|D:s' => \$only_domain,
31    'total-report|t' => \$total_report,
32    'depth-limit|l:i' => \$depth_limit,
33    'real|r' => \$real,
34    'anon|a' => \$anon,
35    'section|s:s' => \$only_section,
36    'file|f:s' => \$usage_file,
37);
38
39$anon = 1 if !$real && !$anon;
40
41open(LOG, $usage_file) || die "Error opening usage log file: $usage_file\n";
42
43if ($only_domain) {
44    print "Transfer Totals include the '$only_domain' domain only.\n";
45    print "All other domains are filtered out for this report.\n\n";
46}
47
48if ($only_section) {
49    print "Transfer Totals include the '$only_section' section only.\n";
50    print "All other sections are filtered out for this report.\n\n";
51}
52
53line: while (<LOG>) {
54
55my $syslog_prefix = '\w\w\w +\d+ \d\d:\d\d:\d\d \S+ rsyncd';
56my $rsyncd_prefix = '\d\d\d\d/\d\d/\d\d \d\d:\d\d:\d\d ';
57
58   next unless ($day,$time,$op,$host,$module,$file,$bytes)
59      = m{^
60	  ( \w\w\w\s+\d+ | \d+/\d\d/\d\d ) \s+ # day
61	  (\d\d:\d\d:\d\d) \s+                 # time
62	  [^[]* \[\d+\]:? \s+                  # pid (ignored)
63	  (send|recv|[<>]f\S+) \s+             # op (%o or %i)
64	  (\S+) \s+                            # host
65	  \[\d+\.\d+\.\d+\.\d+\] \s+           # IP (ignored)
66	  (\S+) \s+                            # module
67	  \(\S*\) \s+                          # user (ignored)
68	  (.*) \s+                             # file name
69	  (\d+)                                # file length in bytes
70	  $ }x;
71
72   # TODO actually divide the data by into send/recv categories
73   if ($op =~ /^>/) {
74      $op = 'send';
75   } elsif ($op =~ /^</) {
76      $op = 'recv';
77   }
78
79   $daytime = $day;
80   $hour = substr($time,0,2); 
81
82   $file = $module . "/" . $file;
83
84   $file =~ s|//|/|mg;
85
86   @path = split(/\//, $file);
87
88   $pathkey = "";
89   for ($i=0; $i <= $#path && $i <= $depth_limit; $i++) {
90	$pathkey = $pathkey . "/" . $path[$i];
91   }
92
93   if ($only_section ne '') {
94       next unless (substr($pathkey,0,length($only_section)) eq $only_section);
95   }
96
97   $host =~ tr/A-Z/a-z/;
98
99   @address = split(/\./, $host);
100
101   $domain = $address[$#address];
102   if ( int($address[0]) > 0 || $#address < 2 )
103      { $domain = "unresolved"; }
104
105   if ($only_domain ne '') {
106       next unless (substr($domain,0,length($only_domain)) eq $only_domain);
107   }
108
109
110#   printf("c=%d day=%s bytes=%d file=%s path=%s\n", 
111#	   $#line, $daytime, $bytes, $file, $pathkey);
112
113   $xferfiles++;                                # total files sent
114   $xfertfiles++;                               # total files sent
115   $xferfiles{$daytime}++;                      # files per day
116   $groupfiles{$pathkey}++;                     # per-group accesses
117   $domainfiles{$domain}++;
118
119   $xferbytes{$daytime}   += $bytes;          # bytes per day
120   $domainbytes{$domain}  += $bytes;		# xmit bytes to domain
121   $xferbytes             += $bytes;          # total bytes sent
122   $groupbytes{$pathkey}  += $bytes;          # per-group bytes sent
123
124   $xfertfiles{$hour}++;                        # files per hour
125   $xfertbytes{$hour}     += $bytes;          # bytes per hour
126   $xfertbytes            += $bytes;          # total bytes sent
127}
128close LOG;
129
130#@syslist = keys %systemfiles;
131@dates = sort datecompare keys %xferbytes;
132
133if ($xferfiles == 0) {die "There was no data to process.\n";}
134
135
136print "TOTALS FOR SUMMARY PERIOD ", $dates[0], " TO ", $dates[$#dates], "\n\n";
137printf("Files Transmitted During Summary Period  %12.0f\n", $xferfiles);
138printf("Bytes Transmitted During Summary Period  %12.0f\n", $xferbytes); 
139#printf("Systems Using Archives                   %12.0f\n\n", $#syslist+1);
140
141printf("Average Files Transmitted Daily          %12.0f\n",
142   $xferfiles / ($#dates + 1));
143printf("Average Bytes Transmitted Daily          %12.0f\n",
144   $xferbytes / ($#dates + 1));
145
146format top1 =
147
148Daily Transmission Statistics
149
150                 Number Of    Number of   Percent Of  Percent Of
151     Date        Files Sent   MB  Sent    Files Sent  Bytes Sent
152---------------  ----------  -----------  ----------  ----------
153.
154
155format line1 =
156@<<<<<<<<<<<<<<  @>>>>>>>>>  @>>>>>>>>>>  @>>>>>>>    @>>>>>>>  
157$date,           $nfiles,    $nbytes/(1024*1024), $pctfiles,  $pctbytes
158.
159
160$^ = top1;
161$~ = line1;
162
163foreach $date (sort datecompare keys %xferbytes) {
164
165   $nfiles   = $xferfiles{$date};
166   $nbytes   = $xferbytes{$date};
167   $pctfiles = sprintf("%8.2f", 100*$xferfiles{$date} / $xferfiles);
168   $pctbytes = sprintf("%8.2f", 100*$xferbytes{$date} / $xferbytes);
169   write;
170}
171
172if ($total_report) {
173format top2 =
174
175Total Transfers from each Archive Section (By bytes)
176
177                                                           - Percent -
178     Archive Section                   NFiles     MB      Files   Bytes
179------------------------------------- ------- ----------- ----- -------
180.
181
182format line2 =
183@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @>>>>>> @>>>>>>>>>> @>>>>   @>>>>
184$section,                 $files,    $bytes/(1024*1024),     $pctfiles, $pctbytes
185.
186
187$| = 1;
188$- = 0;
189$^ = top2;
190$~ = line2;
191
192foreach $section (sort bytecompare keys %groupfiles) {
193
194   $files = $groupfiles{$section};
195   $bytes = $groupbytes{$section};
196   $pctbytes = sprintf("%8.2f", 100 * $groupbytes{$section} / $xferbytes);
197   $pctfiles = sprintf("%8.2f", 100 * $groupfiles{$section} / $xferfiles);
198   write;
199
200}
201
202if ( $xferfiles < 1 ) { $xferfiles = 1; }
203if ( $xferbytes < 1 ) { $xferbytes = 1; }
204}
205
206if ($domain_report) {
207format top3 =
208
209Total Transfer Amount By Domain
210
211             Number Of    Number of    Percent Of  Percent Of
212Domain Name  Files Sent    MB Sent     Files Sent  Bytes Sent
213-----------  ----------  ------------  ----------  ----------
214.
215
216format line3 =
217@<<<<<<<<<<  @>>>>>>>>>  @>>>>>>>>>>>  @>>>>>>>    @>>>>>>>  
218$domain,     $files,     $bytes/(1024*1024), $pctfiles,  $pctbytes
219.
220
221$- = 0;
222$^ = top3;
223$~ = line3;
224
225foreach $domain (sort domnamcompare keys %domainfiles) {
226
227   if ( $domainsecs{$domain} < 1 ) { $domainsecs{$domain} = 1; }
228
229   $files = $domainfiles{$domain};
230   $bytes = $domainbytes{$domain};
231   $pctfiles = sprintf("%8.2f", 100 * $domainfiles{$domain} / $xferfiles);
232   $pctbytes = sprintf("%8.2f", 100 * $domainbytes{$domain} / $xferbytes);
233   write;
234
235}
236
237}
238
239if ($hourly_report) {
240
241format top8 =
242
243Hourly Transmission Statistics
244
245                 Number Of    Number of   Percent Of  Percent Of
246     Time        Files Sent    MB  Sent   Files Sent  Bytes Sent
247---------------  ----------  -----------  ----------  ----------
248.
249
250format line8 =
251@<<<<<<<<<<<<<<  @>>>>>>>>>  @>>>>>>>>>>  @>>>>>>>    @>>>>>>>  
252$hour,           $nfiles,    $nbytes/(1024*1024), $pctfiles,  $pctbytes
253.
254
255
256$| = 1;
257$- = 0;
258$^ = top8;
259$~ = line8;
260
261foreach $hour (sort keys %xfertbytes) {
262
263   $nfiles   = $xfertfiles{$hour};
264   $nbytes   = $xfertbytes{$hour};
265   $pctfiles = sprintf("%8.2f", 100*$xfertfiles{$hour} / $xferfiles);
266   $pctbytes = sprintf("%8.2f", 100*$xfertbytes{$hour} / $xferbytes);
267   write;
268}
269}
270exit(0);
271
272sub datecompare {
273    $a gt $b;
274}
275
276sub domnamcompare {
277
278   $sdiff = length($a) - length($b);
279   ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0;
280
281}
282
283sub bytecompare {
284
285   $bdiff = $groupbytes{$b} - $groupbytes{$a};
286   ($bdiff < 0) ? -1 : ($bdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0;
287
288}
289
290sub faccompare {
291
292   $fdiff = $fac{$b} - $fac{$a};
293   ($fdiff < 0) ? -1 : ($fdiff > 0) ? 1 : ($a lt $b) ? -1 : ($a gt $b) ? 1 : 0;
294
295}
296
297sub usage
298{
299    die <<EOT;
300USAGE: rsyncstats [options]
301
302OPTIONS:
303  -f FILENAME   Use FILENAME for the log file.
304  -h            Include report on hourly traffic.
305  -d            Include report on domain traffic.
306  -t            Report on total traffic by section.
307  -D DOMAIN     Report only on traffic from DOMAIN.
308  -l DEPTH      Set DEPTH of path detail for sections.
309  -s SECTION    Set SECTION to report on. For example, "-s /pub"
310                will report only on paths under "/pub".
311EOT
312}
313