aboutsummaryrefslogtreecommitdiffstats
path: root/extract_archive
diff options
context:
space:
mode:
Diffstat (limited to 'extract_archive')
-rwxr-xr-xextract_archive248
1 files changed, 248 insertions, 0 deletions
diff --git a/extract_archive b/extract_archive
new file mode 100755
index 0000000..cdd98da
--- /dev/null
+++ b/extract_archive
@@ -0,0 +1,248 @@
+#!/usr/bin/perl
+
+#- Mandrake Simple Archive Extracter.
+#- Copyright (C) 1999 MandrakeSoft (fpons@mandrakesoft.com)
+#-
+#- This program is free software; you can redistribute it and/or modify
+#- it under the terms of the GNU General Public License as published by
+#- the Free Software Foundation; either version 2, or (at your option)
+#- any later version.
+#-
+#- This program is distributed in the hope that it will be useful,
+#- but WITHOUT ANY WARRANTY; without even the implied warranty of
+#- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#- GNU General Public License for more details.
+#-
+#- You should have received a copy of the GNU General Public License
+#- along with this program; if not, write to the Free Software
+#- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#- Simple cat archive with gzip/bzip2 for perl.
+#- see build_archive for more information.
+#-
+#- uncompressing sheme is:
+#- | |
+#- | | | |
+#- $off1 =|*| } | |
+#- |*| } $off2 =|+| }
+#- |*| } $siz1 => 'gzip/bzip2 -d' => |+| } $siz2 => $filename
+#- |*| } |+| }
+#- |*| } | |
+#- | | | |
+#- | | | |
+#- | |
+
+use strict qw(subs vars refs);
+
+#- used for uncompressing archive and other.
+my %toc_trailer;
+my @data;
+my %data;
+
+#- taken from DrakX common stuff, for conveniance and modified to match our expectation.
+sub dirname { @_ == 1 or die "usage: dirname <name>\n"; local $_ = shift; s|[^/]*/*\s*$||; s|(.)/*$|$1|; $_ || '.' }
+sub basename { @_ == 1 or die "usage: basename <name>\n"; local $_ = shift; s|/*\s*$||; s|.*/||; $_ }
+sub mkdir_ {
+ my $root = dirname $_[0];
+ if (-e $root) {
+ -d $root or die "mkdir: error creating directory $_[0]: $root is a file and i won't delete it\n";
+ } else {
+ mkdir_($root);
+ }
+ -d $_[0] and return;
+ mkdir $_[0], 0755 or die "mkdir: error creating directory $_: $!\n";
+}
+sub symlink_ { mkdir_ dirname($_[1]); unlink $_[1]; symlink $_[0], $_[1] }
+
+#- compute the closure of filename list according to symlinks or directory
+#- contents inside the archive.
+sub compute_closure {
+ my %file;
+ my @file;
+
+ #- keep in mind when a filename already exist and remove doublons.
+ @file{@_} = ();
+
+ #- navigate through filename list to follow symlinks.
+ do {
+ @file = grep { !$file{$_} } keys %file;
+ foreach (@file) {
+ my $file = $_;
+
+ #- keep in mind this one has been processed and does not need
+ #- to be examined again.
+ $file{$file} = 1;
+
+ exists $data{$file} or next;
+
+ for ($data{$file}[0]) {
+ #- on symlink, try to follow it and mark %file if
+ #- it is still inside the archive contents.
+ /l/ && do {
+ my ($source, $target) = ($file, $data{$file}[1]);
+
+ $source =~ s|[^/]*$||; #- remove filename to navigate directory.
+ if ($source) {
+ while ($target =~ s|^\./|| || $target =~ s|//+|/| || $target =~ s|/$|| or
+ $source and $target =~ s|^\.\./|| and $source =~ s|[^/]*/$||) {}
+ }
+
+ #- FALL THROUGH with new selection.
+ $file = $target =~ m|^/| ? $target : $source.$target;
+ };
+
+ #- on directory, try all files on data starting with
+ #- this directory, provided they are not already taken
+ #- into account.
+ /[ld]/ && do {
+ @file{grep { !$file{$_} && m|^$file$| || m|^$file/| } keys %data} = ();
+ last;
+ };
+ }
+ }
+ } while (@file > 0);
+
+ keys %file;
+}
+
+#- read toc at end of archive.
+sub read_toc {
+ my ($file) = @_;
+ my ($toc, $toc_trailer, $toc_size);
+ my @toc_str;
+ my @toc_data;
+
+ local *ARCHIVE;
+ open ARCHIVE, "<$file" or die "cannot open archive file $file\n";
+
+ #- seek to end of file minus 64, size of trailer.
+ #- read toc_trailer, check header/trailer for version 0.
+ seek ARCHIVE, -64, 2;
+ read ARCHIVE, $toc_trailer, 64 or die "cannot read toc_trailer of archive file $file\n";
+ @toc_trailer{qw(header toc_d_count toc_l_count toc_f_count toc_str_size uncompress trailer)} =
+ unpack "a4NNNNZ40a4", $toc_trailer;
+ $toc_trailer{header} eq 'cz[0' && $toc_trailer{trailer} eq '0]cz' or die "bad toc_trailer in archive file $file\n";
+
+ #- read toc, extract data hashes.
+ $toc_size = $toc_trailer{toc_str_size} + 16*$toc_trailer{toc_f_count};
+ seek ARCHIVE, -64-$toc_size, 2;
+
+ #- read strings separated by \n, so this char cannot be inside filename, oops.
+ read ARCHIVE, $toc, $toc_trailer{toc_str_size} or die "cannot read toc of archive file $file\n";
+ @toc_str = split "\n", $toc;
+
+ #- read data for file.
+ read ARCHIVE, $toc, 16*$toc_trailer{toc_f_count} or die "cannot read toc of archive file $file\n";
+ @toc_data = unpack "N". 4*$toc_trailer{toc_f_count}, $toc;
+
+ close ARCHIVE;
+
+ foreach (0..$toc_trailer{toc_d_count}-1) {
+ my $file = $toc_str[$_];
+ push @data, $file;
+ $data{$file} = [ 'd' ];
+ }
+ foreach (0..$toc_trailer{toc_l_count}-1) {
+ my ($file, $symlink) = ($toc_str[$toc_trailer{toc_d_count}+2*$_],
+ $toc_str[$toc_trailer{toc_d_count}+2*$_+1]);
+ push @data, $file;
+ $data{$file} = [ 'l', $symlink ];
+ }
+ foreach (0..$toc_trailer{toc_f_count}-1) {
+ my $file = $toc_str[$toc_trailer{toc_d_count}+2*$toc_trailer{toc_l_count}+$_];
+ push @data, $file;
+ $data{$file} = [ 'f', @toc_data[4*$_ .. 4*$_+3] ];
+ }
+
+ scalar keys %data == $toc_trailer{toc_d_count}+$toc_trailer{toc_l_count}+$toc_trailer{toc_f_count} or
+ die "mismatch count when reading toc, bad archive file $file\n";
+}
+
+sub catsksz {
+ my ($input, $seek, $siz, $output) = @_;
+ my ($buf, $sz);
+
+ while (($sz = sysread($input, $buf, $seek > 4096 ? 4096 : $seek))) {
+ $seek -= $sz;
+ last unless $seek > 0;
+ }
+ while (($sz = sysread($input, $buf, $siz > 4096 ? 4096 : $siz))) {
+ $siz -= $sz;
+ syswrite($output, $buf);
+ last unless $siz > 0;
+ }
+}
+
+sub main {
+ my ($archivename, $dir, @file) = @_;
+ my %extract_table;
+
+ #- update %data according to TOC of archive.
+ read_toc($archivename);
+
+ #- as a special features, if both $dir and $file are empty, list contents of archive.
+ if (!$dir && !@file) {
+ my $count = scalar keys %data;
+ print "$count files in archive, uncompression method is \"$toc_trailer{uncompress}\"\n";
+ foreach my $file (@data) {
+ for ($data{$file}[0]) {
+ /l/ && do { printf "l %13c %s -> %s\n", ' ', $file, $data{$file}[1]; last; };
+ /d/ && do { printf "d %13c %s\n", ' ', $file; last; };
+ /f/ && do { printf "f %12d %s\n", $data{$file}[4], $file; last; };
+ }
+ }
+ exit 0;
+ }
+
+ #- compute closure.
+ @file = compute_closure(@file);
+
+ #- check all file given are in the archive before continuing.
+ foreach (@file) { $data{$_} or die "unable to find file $_ in archive $archivename\n"; }
+
+ foreach my $file (@file) {
+ my $newfile = "$dir/$file";
+
+ print "extracting $file\n";
+ for ($data{$file}[0]) {
+ /l/ && do { symlink_ $data{$file}[1], $newfile; last; };
+ /d/ && do { mkdir_ $newfile; last; };
+ /f/ && do {
+ mkdir_ dirname $newfile;
+ $extract_table{$data{$file}[1]} ||= [ $data{$file}[2], [] ];
+ push @{$extract_table{$data{$file}[1]}[1]}, [ $newfile, $data{$file}[3], $data{$file}[4] ];
+ $extract_table{$data{$file}[1]}[0] == $data{$file}[2] or die "mismatched relocation in toc\n";
+ last;
+ };
+ die "unknown extension $_ when uncompressing archive $archivename\n";
+ }
+ }
+
+ #- delayed extraction is done on each block for a single execution
+ #- of uncompress executable.
+ foreach (sort { $a <=> $b } keys %extract_table) {
+ local *OUTPUT;
+ if (open OUTPUT, "-|") {
+ #- $curr_off is used to handle the reading in a pipe and simulating
+ #- a seek on it as done by catsksz, so last file position is
+ #- last byte not read (ie last block read start + last block read size).
+ my $curr_off = 0;
+ foreach (sort { $a->[1] <=> $b->[1] } @{$extract_table{$_}[1]}) {
+ my ($newfile, $off, $siz) = @$_;
+ local *FILE;
+ open FILE, $dir ? ">$newfile" : ">&STDOUT";
+ catsksz(\*OUTPUT, $off - $curr_off, $siz, \*FILE);
+ $curr_off = $off + $siz;
+ }
+ } else {
+ local *BUNZIP2;
+ open BUNZIP2, "| $toc_trailer{uncompress}";
+ local *ARCHIVE;
+ open ARCHIVE, "<$archivename" or die "cannot open archive $archivename\n";
+ catsksz(\*ARCHIVE, $_, $extract_table{$_}[0], \*BUNZIP2);
+ exit 0;
+ }
+ }
+}
+
+main(@ARGV);