no_comment

author: Pascal Rigaux <pixel@mandriva.com> 2000-03-07 23:37:35 +0000
committer: Pascal Rigaux <pixel@mandriva.com> 2000-03-07 23:37:35 +0000
commit: ecad1562e7ef9070a2ecce05c05e0b431dc2045a (patch)
tree: 508a3dfefe74d74703174e4f3e7167aa494034db /extract_archive
parent: 18a2943fdceb9c1197cdc2b2253db52a94437747 (diff)
download: rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.tar
rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.tar.gz
rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.tar.bz2
rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.tar.xz
rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.zip
1 files changed, 248 insertions, 0 deletions
diff --git a/extract_archive b/extract_archive
new file mode 100755
index 0000000..cdd98da
--- /dev/null
+++ b/extract_archive
@@ -0,0 +1,248 @@
+#!/usr/bin/perl
+
+#- Mandrake Simple Archive Extracter.
+#- Copyright (C) 1999 MandrakeSoft (fpons@mandrakesoft.com)
+#-
+#- This program is free software; you can redistribute it and/or modify
+#- it under the terms of the GNU General Public License as published by
+#- the Free Software Foundation; either version 2, or (at your option)
+#- any later version.
+#-
+#- This program is distributed in the hope that it will be useful,
+#- but WITHOUT ANY WARRANTY; without even the implied warranty of
+#- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#- GNU General Public License for more details.
+#-
+#- You should have received a copy of the GNU General Public License
+#- along with this program; if not, write to the Free Software
+#- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#- Simple cat archive with gzip/bzip2 for perl.
+#- see build_archive for more information.
+#-
+#- uncompressing sheme is:
+#-         | |
+#-         | |                                        | |
+#-  $off1 =|*| }                                      | |
+#-         |*| }                               $off2 =|+| }
+#-         |*| } $siz1   =>   'gzip/bzip2 -d'   =>    |+| } $siz2  => $filename
+#-         |*| }                                      |+| }
+#-         |*| }                                      | |
+#-         | |                                        | |
+#-         | |                                        | |
+#-         | |
+
+use strict qw(subs vars refs);
+
+#- used for uncompressing archive and other.
+my %toc_trailer;
+my @data;
+my %data;
+
+#- taken from DrakX common stuff, for conveniance and modified to match our expectation.
+sub dirname { @_ == 1 or die "usage: dirname <name>\n"; local $_ = shift; s|[^/]*/*\s*$||; s|(.)/*$|$1|; $_ || '.' }
+sub basename { @_ == 1 or die "usage: basename <name>\n"; local $_ = shift; s|/*\s*$||; s|.*/||; $_ }
+sub mkdir_ {
+    my $root = dirname $_[0];
+    if (-e $root) {
+	-d $root or die "mkdir: error creating directory $_[0]: $root is a file and i won't delete it\n";
+    } else {
+	mkdir_($root);
+    }
+    -d $_[0] and return;
+    mkdir $_[0], 0755 or die "mkdir: error creating directory $_: $!\n";
+}
+sub symlink_ { mkdir_ dirname($_[1]); unlink $_[1]; symlink $_[0], $_[1] }
+
+#- compute the closure of filename list according to symlinks or directory
+#- contents inside the archive.
+sub compute_closure {
+    my %file;
+    my @file;
+
+    #- keep in mind when a filename already exist and remove doublons.
+    @file{@_} = ();
+
+    #- navigate through filename list to follow symlinks.
+    do {
+	@file = grep { !$file{$_} } keys %file;
+	foreach (@file) {
+	    my $file = $_;
+
+	    #- keep in mind this one has been processed and does not need
+	    #- to be examined again.
+	    $file{$file} = 1;
+
+	    exists $data{$file} or next;
+
+	    for ($data{$file}[0]) {
+		#- on symlink, try to follow it and mark %file if
+		#- it is still inside the archive contents.
+		/l/ && do {
+		    my ($source, $target) = ($file, $data{$file}[1]);
+
+		    $source =~ s|[^/]*$||; #- remove filename to navigate directory.
+		    if ($source) {
+			while ($target =~ s|^\./|| || $target =~ s|//+|/| || $target =~ s|/$|| or
+			       $source and $target =~ s|^\.\./|| and $source =~ s|[^/]*/$||) {}
+		    }
+
+		    #- FALL THROUGH with new selection.
+		    $file = $target =~ m|^/| ? $target : $source.$target;
+		};
+
+		#- on directory, try all files on data starting with
+		#- this directory, provided they are not already taken
+		#- into account.
+		/[ld]/ && do {
+		    @file{grep { !$file{$_} && m|^$file$| || m|^$file/| } keys %data} = ();
+		    last;
+		};
+	    }
+	}
+    } while (@file > 0);
+
+    keys %file;
+}
+
+#- read toc at end of archive.
+sub read_toc {
+    my ($file) = @_;
+    my ($toc, $toc_trailer, $toc_size);
+    my @toc_str;
+    my @toc_data;
+
+    local *ARCHIVE;
+    open ARCHIVE, "<$file" or die "cannot open archive file $file\n";
+
+    #- seek to end of file minus 64, size of trailer.
+    #- read toc_trailer, check header/trailer for version 0.
+    seek ARCHIVE, -64, 2;
+    read ARCHIVE, $toc_trailer, 64 or die "cannot read toc_trailer of archive file $file\n";
+    @toc_trailer{qw(header toc_d_count toc_l_count toc_f_count toc_str_size uncompress trailer)} =
+	unpack "a4NNNNZ40a4", $toc_trailer;
+    $toc_trailer{header} eq 'cz[0' && $toc_trailer{trailer} eq '0]cz' or die "bad toc_trailer in archive file $file\n";
+
+    #- read toc, extract data hashes.
+    $toc_size = $toc_trailer{toc_str_size} + 16*$toc_trailer{toc_f_count};
+    seek ARCHIVE, -64-$toc_size, 2;
+
+    #- read strings separated by \n, so this char cannot be inside filename, oops.
+    read ARCHIVE, $toc, $toc_trailer{toc_str_size} or die "cannot read toc of archive file $file\n";
+    @toc_str = split "\n", $toc;
+
+    #- read data for file.
+    read ARCHIVE, $toc, 16*$toc_trailer{toc_f_count} or die "cannot read toc of archive file $file\n";
+    @toc_data = unpack "N". 4*$toc_trailer{toc_f_count}, $toc;
+
+    close ARCHIVE;
+
+    foreach (0..$toc_trailer{toc_d_count}-1) {
+	my $file = $toc_str[$_];
+	push @data, $file;
+	$data{$file} = [ 'd' ];
+    }
+    foreach (0..$toc_trailer{toc_l_count}-1) {
+	my ($file, $symlink) = ($toc_str[$toc_trailer{toc_d_count}+2*$_],
+				$toc_str[$toc_trailer{toc_d_count}+2*$_+1]);
+	push @data, $file;
+	$data{$file} = [ 'l', $symlink ];
+    }
+    foreach (0..$toc_trailer{toc_f_count}-1) {
+	my $file = $toc_str[$toc_trailer{toc_d_count}+2*$toc_trailer{toc_l_count}+$_];
+	push @data, $file;
+	$data{$file} = [ 'f', @toc_data[4*$_ .. 4*$_+3] ];
+    }
+
+    scalar keys %data == $toc_trailer{toc_d_count}+$toc_trailer{toc_l_count}+$toc_trailer{toc_f_count} or
+	die "mismatch count when reading toc, bad archive file $file\n";
+}
+
+sub catsksz {
+    my ($input, $seek, $siz, $output) = @_;
+    my ($buf, $sz);
+
+    while (($sz = sysread($input, $buf, $seek > 4096 ? 4096 : $seek))) {
+	$seek -= $sz;
+	last unless $seek > 0;
+    }
+    while (($sz = sysread($input, $buf, $siz > 4096 ? 4096 : $siz))) {
+	$siz -= $sz;
+	syswrite($output, $buf);
+	last unless $siz > 0;
+    }
+}
+
+sub main {
+    my ($archivename, $dir, @file) = @_;
+    my %extract_table;
+
+    #- update %data according to TOC of archive.
+    read_toc($archivename);
+
+    #- as a special features, if both $dir and $file are empty, list contents of archive.
+    if (!$dir && !@file) {
+	my $count = scalar keys %data;
+	print "$count files in archive, uncompression method is \"$toc_trailer{uncompress}\"\n";
+	foreach my $file (@data) {
+	    for ($data{$file}[0]) {
+		/l/ && do { printf "l %13c %s -> %s\n", ' ', $file, $data{$file}[1]; last; };
+		/d/ && do { printf "d %13c %s\n", ' ', $file; last; };
+		/f/ && do { printf "f %12d %s\n", $data{$file}[4], $file; last; };
+	    }
+	}
+	exit 0;
+    }
+
+    #- compute closure.
+    @file = compute_closure(@file);
+
+    #- check all file given are in the archive before continuing.
+    foreach (@file) { $data{$_} or die "unable to find file $_ in archive $archivename\n"; }
+
+    foreach my $file (@file) {
+	my $newfile = "$dir/$file";
+
+	print "extracting $file\n";
+	for ($data{$file}[0]) {
+	    /l/ && do { symlink_ $data{$file}[1], $newfile; last; };
+	    /d/ && do { mkdir_ $newfile; last; };
+	    /f/ && do {
+		mkdir_ dirname $newfile;
+		$extract_table{$data{$file}[1]} ||= [ $data{$file}[2], [] ];
+		push @{$extract_table{$data{$file}[1]}[1]}, [ $newfile, $data{$file}[3], $data{$file}[4] ];
+		$extract_table{$data{$file}[1]}[0] == $data{$file}[2] or die "mismatched relocation in toc\n";
+		last;
+	    };
+	    die "unknown extension $_ when uncompressing archive $archivename\n";
+	}
+    }
+
+    #- delayed extraction is done on each block for a single  execution
+    #- of uncompress executable.
+    foreach (sort { $a <=> $b } keys %extract_table) {
+	local *OUTPUT;
+	if (open OUTPUT, "-|") {
+	    #- $curr_off is used to handle the reading in a pipe and simulating
+	    #- a seek on it as done by catsksz, so last file position is
+	    #- last byte not read (ie last block read start + last block read size).
+	    my $curr_off = 0;
+	    foreach (sort { $a->[1] <=> $b->[1] } @{$extract_table{$_}[1]}) {
+		my ($newfile, $off, $siz) = @$_;
+		local *FILE;
+		open FILE, $dir ? ">$newfile" : ">&STDOUT";
+		catsksz(\*OUTPUT, $off - $curr_off, $siz, \*FILE);
+		$curr_off = $off + $siz;
+	    }
+	} else {
+	    local *BUNZIP2;
+	    open BUNZIP2, "| $toc_trailer{uncompress}";
+	    local *ARCHIVE;
+	    open ARCHIVE, "<$archivename" or die "cannot open archive $archivename\n";
+	    catsksz(\*ARCHIVE, $_, $extract_table{$_}[0], \*BUNZIP2);
+	    exit 0;
+	}
+    }
+}
+
+main(@ARGV);
author	Pascal Rigaux <pixel@mandriva.com>	2000-03-07 23:37:35 +0000
committer	Pascal Rigaux <pixel@mandriva.com>	2000-03-07 23:37:35 +0000
commit	ecad1562e7ef9070a2ecce05c05e0b431dc2045a (patch)
tree	508a3dfefe74d74703174e4f3e7167aa494034db /extract_archive
parent	18a2943fdceb9c1197cdc2b2253db52a94437747 (diff)
download	rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.tar rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.tar.gz rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.tar.bz2 rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.tar.xz rpmtools-ecad1562e7ef9070a2ecce05c05e0b431dc2045a.zip