#!/usr/bin/perl #- Mandrake Simple Archive Builder. #- Copyright (C) 1999 MandrakeSoft (fpons@mandrakesoft.com) #- #- This program is free software; you can redistribute it and/or modify #- it under the terms of the GNU General Public License as published by #- the Free Software Foundation; either version 2, or (at your option) #- any later version. #- #- This program is distributed in the hope that it will be useful, #- but WITHOUT ANY WARRANTY; without even the implied warranty of #- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #- GNU General Public License for more details. #- #- You should have received a copy of the GNU General Public License #- along with this program; if not, write to the Free Software #- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #- Simple cat archive with bzip2 for perl. #- read file list and produce an $ARGV[0].cz2 archive file. #- uncompressing sheme is: #- | | #- | | | | #- $off1 =|*| } | | #- |*| } $off2 =|+| } #- |*| } $siz1 => 'bzip2 -d' => |+| } $siz2 => $filename #- |*| } |+| } #- |*| } | | #- | | | | #- | | | | #- | | #- where %data has the following format: #- { 'filename' => [ 'f', $off1, $siz1, $off2, $siz2 ] } #- except for symbolink link where it is: #- { 'filename_symlink' => [ 'l', $symlink_value ] } #- and directory where it is only #- { 'filename_directory' => [ 'd' ] } #- as you can see, there is no owner, group, filemode... an extension could be #- made with 'F' (instead of 'f'), 'L' instead of 'l' for exemple. #- we do not need them as it is used for DrakX for fast archive extraction and #- owner/filemode is for user running only (ie root). #- #- archive file contains concatenation of all bzip2'ed group of files whose #- filenames are on input, #- then a TOC (describing %data, concatenation of toc_line) follow and a #- TOC_TRAILER for summary. #+use strict qw(subs vars refs); #- tempory file used for building. my $tmpdir = $ENV{TMPDIR} || "/tmp"; my $tmpz = "$tmpdir/tmp.z.$$"; sub toc_line { my ($file, $data) = @_; for ($data->[0]) { return(/l/ && pack("anna*", 'l', length($file), length($data->[1]), "$file$data->[1]") || /d/ && pack("ana*", 'd', length($file), $file) || /f/ && pack("anNNNNa*", 'f', length($file), $data->[1], $data->[2], $data->[3], $data->[4], $file) || die "unknown extension $_\n"); } } sub cat_compress { my ($compress, @filenames) = @_; local *F; open F, "| $compress >$tmpz" or die "cannot start \"$compress\"\n"; foreach (@filenames) { my ($buf, $siz, $sz); local *FILE; open FILE, $_ or die "cannot open $_: $!\n"; $siz = -s $_; while (($sz = sysread(FILE, $buf, $siz > 16384 ? 16384 : $siz))) { $siz -= $sz; syswrite(F, $buf); last unless $siz > 0; } close FILE; } close F; -s $tmpz; } sub main { my ($archivename, $maxsiz) = @_; my ($compress, $uncompress, $off1, $siz1, $off2, $siz2) = ('', '', 0, 0, 0, 0); my @filelist = (); my @data = (); my %data = (); die "usage: $0 \n" unless $maxsiz >= 100000; #- guess compress method to use. if ($archivename =~ /\.cz$/) { ($compress, $uncompress) = ("gzip -9", "gzip -d"); } elsif ($archivename =~ /\.cz2$/) { ($compress, $uncompress) = ("bzip2 -9", "bzip2 -d"); } else { die "how to choose a compression which such a filename $archivename\n"; } print STDERR "choosing compression method with \"$compress\" for archive $archivename\n"; unlink "$archivename"; unlink $tmpz; foreach () { chomp; my $file = $_; -e $file or die "unable to find file $file\n"; push @data, $file; #- now symbolic link and directory are supported, extension is #- available with the first field of $data{$file}. if (-l $file) { $data{$file} = [ 'l', readlink $file ]; } elsif (-d $file) { $data{$file} = [ 'd' ]; } else { $siz2 = -s $file; push @filelist, $file; $data{$file} = [ 'f', -1, -1, $off2, $siz2 ]; if ($off2 + $siz2 > $maxsiz) { #- need compression. $siz1 = cat_compress($compress, @filelist); foreach (@filelist) { $data{$_} = [ 'f', $off1, $siz1, $data{$_}[3], $data{$_}[4] ] } system "cat $tmpz >>$archivename"; $off1 += $siz1; $off2 = 0; $siz2 = 0; @filelist = (); } $off2 += $siz2; } } if (scalar @filelist) { $siz1 = cat_compress($compress, @filelist); foreach (@filelist) { $data{$_} = [ 'f', $off1, $siz1, $data{$_}[3], $data{$_}[4] ] } system "cat $tmpz >>$archivename"; $off1 += $siz1; print STDERR "real archive size of $archivename is $off1\n"; } #- produce a TOC directly at the end of the file, follow with #- a trailer with TOC summary and archive summary. local *OUTPUT; open OUTPUT, ">>$archivename"; my ($toc_str, $toc_data) = ('', ''); my @data_d = (); my @data_l = (); my @data_f = (); foreach (@data) { my $file = $_; $data{$file} or die "build_archive: internal error on $_\n"; #- specific according to type. #- with this version, only f has specific data other than strings. for ($data{$file}[0]) { /d/ && do { push @data_d, $file; last; }; /l/ && do { push @data_l, $file; last; }; /f/ && do { push @data_f, $file; $toc_data .= pack("NNNN", $data{$file}[1], $data{$file}[2], $data{$file}[3], $data{$file}[4]); last; }; die "unknown extension $_\n"; } } foreach (@data_d) { $toc_str .= $_ . "\n" } foreach (@data_l) { $toc_str .= $_ . "\n" . $data{$_}[1] . "\n" } foreach (@data_f) { $toc_str .= $_ . "\n" } print OUTPUT $toc_str; print OUTPUT $toc_data; print OUTPUT toc_trailer(scalar(@data_d), scalar(@data_l), scalar(@data_f), length($toc_str), $uncompress); close OUTPUT; } sub toc_trailer { my ($toc_d_count, $toc_l_count, $toc_f_count, $toc_str_size, $uncompress) = @_; #- 'cz[0' is toc_trailer header where 0 is version information, only 0 now. #- '0]cz' is toc_trailer trailer that match the corresponding header for information. return pack "a4NNNNa40a4", 'cz[0', $toc_d_count, $toc_l_count, $toc_f_count, $toc_str_size, $uncompress, '0]cz'; } main(@ARGV); unlink $tmpz;