aboutsummaryrefslogtreecommitdiffstats
path: root/extract_archive
blob: efa3ff40e57f34c1d3ce904c04cb4225bfa5b009 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/usr/bin/perl

#- Mandrake Simple Archive Extracter.
#- Copyright (C) 1999 MandrakeSoft (fpons@mandrakesoft.com)
#-
#- This program is free software; you can redistribute it and/or modify
#- it under the terms of the GNU General Public License as published by
#- the Free Software Foundation; either version 2, or (at your option)
#- any later version.
#-
#- This program is distributed in the hope that it will be useful,
#- but WITHOUT ANY WARRANTY; without even the implied warranty of
#- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#- GNU General Public License for more details.
#-
#- You should have received a copy of the GNU General Public License
#- along with this program; if not, write to the Free Software
#- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

#- Simple cat archive with gzip/bzip2 for perl.
#- see build_archive for more information.
#-
#- uncompressing sheme is:
#-         | |
#-         | |                                        | |
#-  $off1 =|*| }                                      | |
#-         |*| }                               $off2 =|+| }
#-         |*| } $siz1   =>   'gzip/bzip2 -d'   =>    |+| } $siz2  => $filename
#-         |*| }                                      |+| }
#-         |*| }                                      | |
#-         | |                                        | |
#-         | |                                        | |
#-         | |

use strict qw(subs vars refs);

#- used for uncompressing archive and other.
my %toc_trailer;
my @data;
my %data;

#- taken from DrakX common stuff, for conveniance and modified to match our expectation.
sub dirname { @_ == 1 or die "usage: dirname <name>\n"; local $_ = shift; s|[^/]*/*\s*$||; s|(.)/*$|$1|; $_ || '.' }
sub basename { @_ == 1 or die "usage: basename <name>\n"; local $_ = shift; s|/*\s*$||; s|.*/||; $_ }
sub mkdir_ {
    my $root = dirname $_[0];
    if (-e $root) {
	-d $root or die "mkdir: error creating directory $_[0]: $root is a file and i won't delete it\n";
    } else {
	mkdir_($root);
    }
    -d $_[0] and return;
    mkdir $_[0], 0755 or die "mkdir: error creating directory $_: $!\n";
}
sub symlink_ { mkdir_ dirname($_[1]); unlink $_[1]; symlink $_[0], $_[1] }

#- compute the closure of filename list according to symlinks or directory
#- contents inside the archive.
sub compute_closure {
    my %file;
    my @file;

    #- keep in mind when a filename already exist and remove doublons.
    @file{@_} = ();

    #- navigate through filename list to follow symlinks.
    do {
	@file = grep { !$file{$_} } keys %file;
	foreach (@file) {
	    my $file = $_;

	    #- keep in mind this one has been processed and does not need
	    #- to be examined again.
	    $file{$file} = 1;

	    exists $data{$file} or next;

	    for ($data{$file}[0]) {
		#- on symlink, try to follow it and mark %file if
		#- it is still inside the archive contents.
		/l/ && do {
		    my ($source, $target) = ($file, $data{$file}[1]);

		    $source =~ s|[^/]*$||; #- remove filename to navigate directory.
		    if ($source) {
			while ($target =~ s|^\./|| || $target =~ s|//+|/| || $target =~ s|/$|| or
			       $source and $target =~ s|^\.\./|| and $source =~ s|[^/]*/$||) {}
		    }

		    #- FALL THROUGH with new selection.
		    $file = $target =~ m|^/| ? $target : $source.$target;
		};

		#- on directory, try all files on data starting with
		#- this directory, provided they are not already taken
		#- into account.
		/[ld]/ && do {
		    @file{grep { !$file{$_} && m|^$file$| || m|^$file/| } keys %data} = ();
		    last;
		};
	    }
	}
    } while (@file > 0);

    keys %file;
}

#- read toc at end of archive.
sub read_toc {
    my ($file) = @_;
    my ($toc, $toc_trailer, $toc_size);
    my @toc_str;
    my @toc_data;

    local *ARCHIVE;
    open ARCHIVE, "<$file" or die "cannot open archive file $file\n";

    #- seek to end of file minus 64, size of trailer.
    #- read toc_trailer, check header/trailer for version 0.
    seek ARCHIVE, -64, 2;
    read ARCHIVE, $toc_trailer, 64 or die "cannot read toc_trailer of archive file $file\n";
    @toc_trailer{qw(header toc_d_count toc_l_count toc_f_count toc_str_size uncompress trailer)} =
	unpack "a4NNNNZ40a4", $toc_trailer;
    $toc_trailer{header} eq 'cz[0' && $toc_trailer{trailer} eq '0]cz' or die "bad toc_trailer in archive file $file\n";

    #- read toc, extract data hashes.
    $toc_size = $toc_trailer{toc_str_size} + 16*$toc_trailer{toc_f_count};
    seek ARCHIVE, -64-$toc_size, 2;

    #- read strings separated by \n, so this char cannot be inside filename, oops.
    read ARCHIVE, $toc, $toc_trailer{toc_str_size} or die "cannot read toc of archive file $file\n";
    @toc_str = split "\n", $toc;

    #- read data for file.
    read ARCHIVE, $toc, 16*$toc_trailer{toc_f_count} or die "cannot read toc of archive file $file\n";
    @toc_data = unpack "N". 4*$toc_trailer{toc_f_count}, $toc;

    close ARCHIVE;

    foreach (0..$toc_trailer{toc_d_count}-1) {
	my $file = $toc_str[$_];
	push @data, $file;
	$data{$file} = [ 'd' ];
    }
    foreach (0..$toc_trailer{toc_l_count}-1) {
	my ($file, $symlink) = ($toc_str[$toc_trailer{toc_d_count}+2*$_],
				$toc_str[$toc_trailer{toc_d_count}+2*$_+1]);
	push @data, $file;
	$data{$file} = [ 'l', $symlink ];
    }
    foreach (0..$toc_trailer{toc_f_count}-1) {
	my $file = $toc_str[$toc_trailer{toc_d_count}+2*$toc_trailer{toc_l_count}+$_];
	push @data, $file;
	$data{$file} = [ 'f', @toc_data[4*$_ .. 4*$_+3] ];
    }

    scalar keys %data == $toc_trailer{toc_d_count}+$toc_trailer{toc_l_count}+$toc_trailer{toc_f_count} or
	die "mismatch count when reading toc, bad archive file $file\n";
}

sub catsksz {
    my ($input, $seek, $siz, $output) = @_;
    my ($buf, $sz);

    while (($sz = sysread($input, $buf, $seek > 4096 ? 4096 : $seek))) {
	$seek -= $sz;
	last unless $seek > 0;
    }
    while (($sz = sysread($input, $buf, $siz > 4096 ? 4096 : $siz))) {
	$siz -= $sz;
	syswrite($output, $buf);
	last unless $siz > 0;
    }
}

sub main {
    my ($archivename, $dir, @file) = @_;
    my %extract_table;

    #- update %data according to TOC of archive.
    read_toc($archivename);

    #- as a special features, if both $dir and $file are empty, list contents of archive.
    if (!$dir && !@file) {
	my $count = scalar keys %data;
	print "$count files in archive, uncompression method is \"$toc_trailer{uncompress}\"\n";
	foreach my $file (@data) {
	    for ($data{$file}[0]) {
		/l/ && do { printf "l %13c %s -> %s\n", ' ', $file, $data{$file}[1]; last; };
		/d/ && do { printf "d %13c %s\n", ' ', $file; last; };
		/f/ && do { printf "f %12d %s\n", $data{$file}[4], $file; last; };
	    }
	}
	exit 0;
    }

    #- compute closure.
    @file = compute_closure(@file);

    foreach my $file (@file) {
	#- check for presence of file, but do not abort, continue with others.
	$data{$file} or print STDERR "unable to find file $file in archive $archivename\n";

	my $newfile = "$dir/$file";

	print "extracting $file\n";
	for ($data{$file}[0]) {
	    /l/ && do { symlink_ $data{$file}[1], $newfile; last; };
	    /d/ && do { mkdir_ $newfile; last; };
	    /f/ && do {
		mkdir_ dirname $newfile;
		$extract_table{$data{$file}[1]} ||= [ $data{$file}[2], [] ];
		push @{$extract_table{$data{$file}[1]}[1]}, [ $newfile, $data{$file}[3], $data{$file}[4] ];
		$extract_table{$data{$file}[1]}[0] == $data{$file}[2] or die "mismatched relocation in toc\n";
		last;
	    };
	    die "unknown extension $_ when uncompressing archive $archivename\n";
	}
    }

    #- delayed extraction is done on each block for a single  execution
    #- of uncompress executable.
    foreach (sort { $a <=> $b } keys %extract_table) {
	local *OUTPUT;
	if (open OUTPUT, "-|") {
	    #- $curr_off is used to handle the reading in a pipe and simulating
	    #- a seek on it as done by catsksz, so last file position is
	    #- last byte not read (ie last block read start + last block read size).
	    my $curr_off = 0;
	    foreach (sort { $a->[1] <=> $b->[1] } @{$extract_table{$_}[1]}) {
		my ($newfile, $off, $siz) = @$_;
		local *FILE;
		open FILE, $dir ? ">$newfile" : ">&STDOUT";
		catsksz(\*OUTPUT, $off - $curr_off, $siz, \*FILE);
		$curr_off = $off + $siz;
	    }
	} else {
	    local *BUNZIP2;
	    open BUNZIP2, "| $toc_trailer{uncompress}";
	    local *ARCHIVE;
	    open ARCHIVE, "<$archivename" or die "cannot open archive $archivename\n";
	    catsksz(\*ARCHIVE, $_, $extract_table{$_}[0], \*BUNZIP2);
	    exit 0;
	}
    }
}

main(@ARGV);