ptargrep revision 1.2
1#!/usr/bin/perl
2##############################################################################
3# Tool for using regular expressions against the contents of files in a tar
4# archive.  See 'ptargrep --help' for more documentation.
5#
6
7BEGIN { pop @INC if $INC[-1] eq '.' }
8use strict;
9use warnings;
10
11use Pod::Usage   qw(pod2usage);
12use Getopt::Long qw(GetOptions);
13use Archive::Tar qw();
14use File::Path   qw(mkpath);
15
16my(%opt, $pattern);
17
18if(!GetOptions(\%opt,
19    'basename|b',
20    'ignore-case|i',
21    'list-only|l',
22    'verbose|v',
23    'help|?',
24)) {
25    pod2usage(-exitval => 1,  -verbose => 0);
26}
27
28
29pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help};
30
31pod2usage(-exitval => 1,  -verbose => 0,
32    -message => "No pattern specified",
33) unless @ARGV;
34make_pattern( shift(@ARGV) );
35
36pod2usage(-exitval => 1,  -verbose => 0,
37    -message => "No tar files specified",
38) unless @ARGV;
39
40process_archive($_) foreach @ARGV;
41
42exit 0;
43
44
45sub make_pattern {
46    my($pat) = @_;
47
48    if($opt{'ignore-case'}) {
49        $pattern = qr{(?im)$pat};
50    }
51    else {
52        $pattern = qr{(?m)$pat};
53    }
54}
55
56
57sub process_archive {
58    my($filename) = @_;
59
60    _log("Processing archive: $filename");
61    my $next = Archive::Tar->iter($filename);
62    while( my $f = $next->() ) {
63        next unless $f->is_file;
64        match_file($f) if $f->size > 0;
65    }
66}
67
68
69sub match_file {
70    my($f)   = @_;
71    my $path = $f->name;
72
73    _log("filename: %s  (%d bytes)", $path, $f->size);
74
75    my $body = $f->get_content();
76    if($body !~ $pattern) {
77        _log("  no match");
78        return;
79    }
80
81    if($opt{'list-only'}) {
82        print $path, "\n";
83        return;
84    }
85
86    save_file($path, $body);
87}
88
89
90sub save_file {
91    my($path, $body) = @_;
92
93    _log("  found match - extracting");
94    my($fh);
95    my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z};
96    if($dir and not $opt{basename}) {
97        _log("  writing to $dir/$file");
98        $dir =~ s{\A/}{./};
99        mkpath($dir) unless -d $dir;
100        open $fh, '>', "$dir/$file" or die "open($dir/$file): $!";
101    }
102    else {
103        _log("  writing to ./$file");
104        open $fh, '>', $file or die "open($file): $!";
105    }
106    print $fh $body;
107    close($fh);
108}
109
110
111sub _log {
112    return unless $opt{verbose};
113    my($format, @args) = @_;
114    warn sprintf($format, @args) . "\n";
115}
116
117
118__END__
119
120=head1 NAME
121
122ptargrep - Apply pattern matching to the contents of files in a tar archive
123
124=head1 SYNOPSIS
125
126  ptargrep [options] <pattern> <tar file> ...
127
128  Options:
129
130   --basename|-b     ignore directory paths from archive
131   --ignore-case|-i  do case-insensitive pattern matching
132   --list-only|-l    list matching filenames rather than extracting matches
133   --verbose|-v      write debugging message to STDERR
134   --help|-?         detailed help message
135
136=head1 DESCRIPTION
137
138This utility allows you to apply pattern matching to B<the contents> of files
139contained in a tar archive.  You might use this to identify all files in an
140archive which contain lines matching the specified pattern and either print out
141the pathnames or extract the files.
142
143The pattern will be used as a Perl regular expression (as opposed to a simple
144grep regex).
145
146Multiple tar archive filenames can be specified - they will each be processed
147in turn.
148
149=head1 OPTIONS
150
151=over 4
152
153=item B<--basename> (alias -b)
154
155When matching files are extracted, ignore the directory path from the archive
156and write to the current directory using the basename of the file from the
157archive.  Beware: if two matching files in the archive have the same basename,
158the second file extracted will overwrite the first.
159
160=item B<--ignore-case> (alias -i)
161
162Make pattern matching case-insensitive.
163
164=item B<--list-only> (alias -l)
165
166Print the pathname of each matching file from the archive to STDOUT.  Without
167this option, the default behaviour is to extract each matching file.
168
169=item B<--verbose> (alias -v)
170
171Log debugging info to STDERR.
172
173=item B<--help> (alias -?)
174
175Display this documentation.
176
177=back
178
179=head1 COPYRIGHT
180
181Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt>
182
183This program is free software; you can redistribute it and/or modify it
184under the same terms as Perl itself.
185
186=cut
187
188
189
190