#!/usr/bin/perl #- Mandrake Simple Archive Extracter. #- Copyright (C) 1999 MandrakeSoft (fpons@mandrakesoft.com) #- #- This program is free software; you can redistribute it and/or modify #- it under the terms of the GNU General Public License as published by #- the Free Software Foundation; either version 2, or (at your option) #- any later version. #- #- This program is distributed in the hope that it will be useful, #- but WITHOUT ANY WARRANTY; without even the implied warranty of #- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #- GNU General Public License for more details. #- #- You should have received a copy of the GNU General Public License #- along with this program; if not, write to the Free Software #- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #- Simple cat archive with gzip/bzip2 for perl. #- see build_archive for more information. #- #- uncompressing sheme is: #- | | #- | | | | #- $off1 =|*| } | | #- |*| } $off2 =|+| } #- |*| } $siz1 => 'gzip/bzip2 -d' => |+| } $siz2 => $filename #- |*| } |+| } #- |*| } | | #- | | | | #- | | | | #- | | #+use strict qw(subs vars refs); #- used for uncompressing archive and other. my %toc_trailer; my @data; my %data; #- taken from DrakX common stuff, for conveniance and modified to match our expectation. sub dirname { @_ == 1 or die "usage: dirname \n"; local $_ = shift; s|[^/]*/*\s*$||; s|(.)/*$|$1|; $_ || '.' } sub basename { @_ == 1 or die "usage: basename \n"; local $_ = shift; s|/*\s*$||; s|.*/||; $_ } sub mkdir_ { my $root = dirname $_[0]; if (-e $root) { -d $root or die "mkdir: error creating directory $_[0]: $root is a file and i won't delete it\n"; } else { mkdir_($root); } -d $_[0] and return; mkdir $_[0], 0755 or die "mkdir: error creating directory $_: $!\n"; } sub symlink_ { mkdir_ dirname($_[1]); unlink $_[1]; symlink $_[0], $_[1] } #- compute the closure of filename list according to symlinks or directory #- contents inside the archive. sub compute_closure { my %file; my @file; #- keep in mind when a filename already exist and remove doublons. @file{@_} = (); #- navigate through filename list to follow symlinks. do { @file = grep { !$file{$_} } keys %file; foreach (@file) { my $file = $_; #- keep in mind this one has been processed and does not need #- to be examined again. $file{$file} = 1; exists $data{$file} or next; for ($data{$file}[0]) { #- on symlink, try to follow it and mark %file if #- it is still inside the archive contents. /l/ && do { my ($source, $target) = ($file, $data{$file}[1]); $source =~ s|[^/]*$||; #- remove filename to navigate directory. if ($source) { while ($target =~ s|^\./|| || $target =~ s|//+|/| || $target =~ s|/$|| or $source and $target =~ s|^\.\./|| and $source =~ s|[^/]*/$||) {} } #- FALL THROUGH with new selection. $file = $target =~ m|^/| ? $target : $source.$target; }; #- on directory, try all files on data starting with #- this directory, provided they are not already taken #- into account. /[ld]/ && do { @file{grep { !$file{$_} && m|^$file$| || m|^$file/| } keys %data} = (); last; }; } } } while (@file > 0); keys %file; } #- read toc at end of archive. sub read_toc { my ($file) = @_; my ($toc, $toc_trailer, $toc_size); my @toc_str; my @toc_data; local *ARCHIVE; open ARCHIVE, "<$file" or die "cannot open archive file $file\n"; #- seek to end of file minus 64, size of trailer. #- read toc_trailer, check header/trailer for version 0. seek ARCHIVE, -64, 2; read ARCHIVE, $toc_trailer, 64 or die "cannot read toc_trailer of archive file $file\n"; @toc_trailer{qw(header toc_d_count toc_l_count toc_f_count toc_str_size uncompress trailer)} = unpack "a4NNNNZ40a4", $toc_trailer; $toc_trailer{header} eq 'cz[0' && $toc_trailer{trailer} eq '0]cz' or die "bad toc_trailer in archive file $file\n"; #- read toc, extract data hashes. $toc_size = $toc_trailer{toc_str_size} + 16*$toc_trailer{toc_f_count}; seek ARCHIVE, -64-$toc_size, 2; #- read strings separated by \n, so this char cannot be inside filename, oops. read ARCHIVE, $toc, $toc_trailer{toc_str_size} or die "cannot read toc of archive file $file\n"; @toc_str = split "\n", $toc; #- read data for file. read ARCHIVE, $toc, 16*$toc_trailer{toc_f_count} or die "cannot read toc of archive file $file\n"; @toc_data = unpack "N". 4*$toc_trailer{toc_f_count}, $toc; close ARCHIVE; foreach (0..$toc_trailer{toc_d_count}-1) { my $file = $toc_str[$_]; push @data, $file; $data{$file} = [ 'd' ]; } foreach (0..$toc_trailer{toc_l_count}-1) { my ($file, $symlink) = ($toc_str[$toc_trailer{toc_d_count}+2*$_], $toc_str[$toc_trailer{toc_d_count}+2*$_+1]); push @data, $file; $data{$file} = [ 'l', $symlink ]; } foreach (0..$toc_trailer{toc_f_count}-1) { my $file = $toc_str[$toc_trailer{toc_d_count}+2*$toc_trailer{toc_l_count}+$_]; push @data, $file; $data{$file} = [ 'f', @toc_data[4*$_ .. 4*$_+3] ]; } scalar keys %data == $toc_trailer{toc_d_count}+$toc_trailer{toc_l_count}+$toc_trailer{toc_f_count} or die "mismatch count when reading toc, bad archive file $file\n"; } sub catsksz { my ($input, $seek, $siz, $output) = @_; my ($buf, $sz); while (($sz = sysread($input, $buf, $seek > 4096 ? 4096 : $seek))) { $seek -= $sz; last unless $seek > 0; } while (($sz = sysread($input, $buf, $siz > 4096 ? 4096 : $siz))) { $siz -= $sz; syswrite($output, $buf); last unless $siz > 0; } } sub main { my ($archivename, $dir, @file) = @_; my %extract_table; #- update %data according to TOC of archive. read_toc($archivename); #- as a special features, if both $dir and $file are empty, list contents of archive. if (!$dir && !@file) { my $count = scalar keys %data; print "$count files in archive, uncompression method is \"$toc_trailer{uncompress}\"\n"; foreach my $file (@data) { for ($data{$file}[0]) { /l/ && do { printf "l %13c %s -> %s\n", ' ', $file, $data{$file}[1]; last; }; /d/ && do { printf "d %13c %s\n", ' ', $file; last; }; /f/ && do { printf "f %12d %s\n", $data{$file}[4], $file; last; }; } } exit 0; } #- compute closure. @file = compute_closure(@file); foreach my $file (@file) { #- check for presence of file, but do not abort, continue with others. $data{$file} or do { print STDERR "unable to find file $file in archive $archivename\n"; next }; my $newfile = "$dir/$file"; print "extracting $file\n"; for ($data{$file}[0]) { /l/ && do { symlink_ $data{$file}[1], $newfile; last; }; /d/ && do { mkdir_ $newfile; last; }; /f/ && do { mkdir_ dirname $newfile; $extract_table{$data{$file}[1]} ||= [ $data{$file}[2], [] ]; push @{$extract_table{$data{$file}[1]}[1]}, [ $newfile, $data{$file}[3], $data{$file}[4] ]; $extract_table{$data{$file}[1]}[0] == $data{$file}[2] or die "mismatched relocation in toc\n"; last; }; die "unknown extension \"$_\" when uncompressing archive $archivename\n"; } } #- delayed extraction is done on each block for a single execution #- of uncompress executable. foreach (sort { $a <=> $b } keys %extract_table) { local *OUTPUT; if (open OUTPUT, "-|") { #- $curr_off is used to handle the reading in a pipe and simulating #- a seek on it as done by catsksz, so last file position is #- last byte not read (ie last block read start + last block read size). my $curr_off = 0; foreach (sort { $a->[1] <=> $b->[1] } @{$extract_table{$_}[1]}) { my ($newfile, $off, $siz) = @$_; local *FILE; open FILE, $dir ? ">$newfile" : ">&STDOUT"; catsksz(\*OUTPUT, $off - $curr_off, $siz, \*FILE); $curr_off = $off + $siz; } } else { local *BUNZIP2; open BUNZIP2, "| $toc_trailer{uncompress}"; local *ARCHIVE; open ARCHIVE, "<$archivename" or die "cannot open archive $archivename\n"; catsksz(\*ARCHIVE, $_, $extract_table{$_}[0], \*BUNZIP2); exit 0; } } } main(@ARGV);