From d8314d9dd12f1bc40960ee9a498ded4ed9e917a3 Mon Sep 17 00:00:00 2001 From: Pascal Rigaux Date: Fri, 11 Jan 2008 17:26:52 +0000 Subject: - 5.0 - urpmf, urpmq: o use xml info instead of hdlist when possible o "urpmq -l" is faster (3x) o "urpmf -l" is slower (1.5x) o "urpmf --sourcerpm" is much faster o see "xml-info" option in urpmi.cfg(5) to see when those files are downloaded o new require: perl module XML::LibXML - urpmq: o use rpm file instead of hdlist/xml-info when file is local o use URPM::Package->changelogs (need perl-URPM 3.06) - urpmi.update, urpmi.addmedia, urpmi.removemedia: o drop hdlist support replaced with xml media_info (this imply file-deps are correctly handled, see genhdlist2(1)) - urpmi.addmedia o new --xml-info option --- .perl_checker | 1 + MANIFEST | 18 +++++ NEWS | 18 ++++- pod/urpmi.addmedia.8.pod | 6 ++ pod/urpmi.cfg.5.pod | 25 +++++++ t/01compile.t | 2 +- urpm.pm | 22 ++++++- urpm/args.pm | 1 + urpm/cfg.pm | 1 + urpm/media.pm | 158 +++++++++++++++++++++++++++++++++++++++----- urpm/xml_info.pm | 168 +++++++++++++++++++++++++++++++++++++++++++++++ urpm/xml_info_pkg.pm | 46 +++++++++++++ urpmf | 93 ++++++++++++++++++++++---- urpmi.addmedia | 10 +++ urpmq | 55 ++++++++-------- 15 files changed, 561 insertions(+), 63 deletions(-) create mode 100644 urpm/xml_info.pm create mode 100644 urpm/xml_info_pkg.pm diff --git a/.perl_checker b/.perl_checker index 1e56591a..5f46df51 100644 --- a/.perl_checker +++ b/.perl_checker @@ -16,3 +16,4 @@ Digest::MD5 Time::HiRes open encoding +XML::LibXML::Reader diff --git a/MANIFEST b/MANIFEST index c6903688..289d057a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -99,9 +99,11 @@ t/01compile.t t/02create_pkgs.t t/cfg.t t/data/media.cfg +t/data/old-suggests-1-1.noarch.rpm t/data/rpm-buggy/invalid-signature.rpm t/data/rpm-buggy/not-a-rpm.rpm t/data/rpm-buggy/weird-header.rpm +t/data/rpm-v3/KBackup-1.2.11-1.src.rpm t/data/rpm-v3/libtermcap-2.0.8-2.i386.rpm t/data/rpm-v3/nls-1.0-2.i386.rpm t/data/rpm-v3/p2c-1.20-7.i386.rpm @@ -109,11 +111,16 @@ t/data/SPECS/arch_to_noarch_1.spec t/data/SPECS/arch_to_noarch_2.spec t/data/SPECS/arch_to_noarch_3.spec t/data/SPECS/arch_to_noarch_4.spec +t/data/SPECS/buildroot_BuildRoot.spec +t/data/SPECS/buildroot_default.spec +t/data/SPECS/buildroot_define.spec t/data/SPECS/file-conflicts/a.spec t/data/SPECS/file-conflicts/b.spec t/data/SPECS/file-conflicts/c.spec t/data/SPECS/file-conflicts/d.spec t/data/SPECS/file-conflicts/e.spec +t/data/SPECS/file-conflicts/fa.spec +t/data/SPECS/file-conflicts/fb.spec t/data/SPECS/file-conflicts/ga.spec t/data/SPECS/file-conflicts/gc.spec t/data/SPECS/file-conflicts/gc_.spec @@ -159,6 +166,7 @@ t/data/SPECS/README-urpmi/b_3.spec t/data/SPECS/README-urpmi/c.spec t/data/SPECS/README-urpmi/d.spec t/data/SPECS/README-urpmi/d_.spec +t/data/SPECS/rpm-query-in-scriptlet.spec t/data/SPECS/rpmnew/a-1.spec t/data/SPECS/rpmnew/a-2.spec t/data/SPECS/rpmnew/a-3.spec @@ -188,6 +196,11 @@ t/data/SPECS/split-transactions--promote-2/e.spec t/data/SPECS/split-transactions--promote-2/f.spec t/data/SPECS/split-transactions--promote-2/g.spec t/data/SPECS/split-transactions--promote-2/h.spec +t/data/SPECS/split-transactions--strict-require-1/a.spec +t/data/SPECS/split-transactions--strict-require-1/b.spec +t/data/SPECS/split-transactions--strict-require-1/c.spec +t/data/SPECS/split-transactions--strict-require-2/a.spec +t/data/SPECS/split-transactions--strict-require-2/c.spec t/data/SPECS/split-transactions--strict-require-and-obsolete-1/a.spec t/data/SPECS/split-transactions--strict-require-and-obsolete-1/bb.spec t/data/SPECS/split-transactions--strict-require-and-obsolete-1/c-1.spec @@ -205,6 +218,7 @@ t/data/SPECS/suggests/a-3.spec t/data/SPECS/suggests/b.spec t/data/SPECS/suggests/bb.spec t/data/SPECS/suggests/c.spec +t/data/SPECS/suggests/c2.spec t/data/SPECS/suggests/cc.spec t/data/SPECS/suggests/suggested_b.spec t/data/SPECS/suggests/suggested_c.spec @@ -230,11 +244,13 @@ t/superuser--prefer.t t/superuser--priority-upgrade.t t/superuser--provide-and-no-obsolete.t t/superuser--README-urpmi.t +t/superuser--rpm.t t/superuser--rpmnew.t t/superuser--specify-media.t t/superuser--split-transactions--conflict.t t/superuser--split-transactions--promote.t t/superuser--split-transactions--strict-require-and-obsolete.t +t/superuser--split-transactions--strict-require.t t/superuser--split-transactions.t t/superuser--srpm-bootstrapping.t t/superuser--suggests.t @@ -262,6 +278,8 @@ urpm/select.pm urpm/signature.pm urpm/sys.pm urpm/util.pm +urpm/xml_info.pm +urpm/xml_info_pkg.pm urpme urpmf urpmi diff --git a/NEWS b/NEWS index 1d8a1b1a..2e7902d8 100644 --- a/NEWS +++ b/NEWS @@ -1,25 +1,37 @@ +Version 5.0 - 11 January 2008, by Pascal "Pixel" Rigaux + +- urpmf, urpmq: + o use xml info instead of hdlist when possible + o "urpmq -l" is faster (3x) + o "urpmf -l" is slower (1.5x) + o "urpmf --sourcerpm" is much faster + o see "xml-info" option in urpmi.cfg(5) to see when those files are downloaded + o new require: perl module XML::LibXML +- urpmq: + o use rpm file instead of hdlist/xml-info when file is local + o use URPM::Package->changelogs (need perl-URPM 3.06) - urpmf o fix an *old* bug (since december 2002) making urpmf keeps parsed hdlist files in memory (was fixed for multitags, but not for simple tags) - all tools: - o drop hdlist support (will be replaced with xml media_info) o replace /var/lib/urpmi/MD5SUM with /var/lib/urpmi/MD5SUM. (this will allow checking xml media_info is up-to-date even if we don't update it at the same time as synthesis is updated) - urpmi.update, urpmi.addmedia, urpmi.removemedia: + o drop hdlist support replaced with xml media_info + (this imply file-deps are correctly handled, see genhdlist2(1)) o drop option "-c" which used to clean /var/cache/urpmi/headers o enhancement: only parse updated synthesis - urpmi.addmedia o do check md5sum of downloaded synthesis (the check was only done on urpmi.update) + o new --xml-info option - urpmi: o do remove __db* on priority-upgrade (fix regression introduced in 4.10.15) o always prompt before doing a priority-upgrade transaction, even if there is only one priority upgrade package (since there will be more packages to install after restarting urpmi) -- urpmq: - o use URPM::Package->changelogs (need perl-URPM 3.06) Version 4.10.19 - 12 December 2007, by Pascal "Pixel" Rigaux diff --git a/pod/urpmi.addmedia.8.pod b/pod/urpmi.addmedia.8.pod index 07a7ae00..60753408 100644 --- a/pod/urpmi.addmedia.8.pod +++ b/pod/urpmi.addmedia.8.pod @@ -117,6 +117,12 @@ username and a password. Adds a media which will be taken into account by B or by C when looking for updates. +=item B<--xml-info> + +Use the specific policy for downloading xml info files. +It must be one of: never, on-demand, update-only, always. +See urpmi.cfg(5) for more information. + =item B<--probe-synthesis> Use synthesis file. diff --git a/pod/urpmi.cfg.5.pod b/pod/urpmi.cfg.5.pod index 14a40874..62aaf3e3 100644 --- a/pod/urpmi.cfg.5.pod +++ b/pod/urpmi.cfg.5.pod @@ -29,6 +29,31 @@ Same as specifying B<--allow-force> for urpmi. Disabled by default. Same as specifying B<--allow-nodeps> for urpmi. Disabled by default. +=item B + +For remote media, specify when files.xml.lzma, changelog.xml.lzma and info.xml.lzma are downloaded: + +=item B + +=item B + +(This is the default). + +The specific xml info file is downloaded when urpmq/urpmf/rpmdrake ask for it. +urpmi.update will remove outdated xml info file. + +nb: if urpmq/urpmf/rpmdrake is not run by root, the xml info file is downloaded into /tmp/.urpmi-/ + +=item B + +urpmi.update will update xml info files already required at least once by urpmq/urpmf/rpmdrake. + +nb: with B, urpmi.update will not update /tmp/.urpmi-/ xml info files + +=item B + +all xml info files are downloaded when doing urpmi.addmedia and urpmi.update + =item B Same as specifying B<--no-suggests> for urpmi. Disabled by default. diff --git a/t/01compile.t b/t/01compile.t index cb744cc1..f4966acf 100644 --- a/t/01compile.t +++ b/t/01compile.t @@ -2,7 +2,7 @@ use strict; use warnings; -use Test::More tests => 21; +use Test::More tests => 23; for my $module (glob("urpm/*.pm")) { $module =~ s,/,::,g; diff --git a/urpm.pm b/urpm.pm index 1e7c5c26..250a9204 100644 --- a/urpm.pm +++ b/urpm.pm @@ -12,7 +12,7 @@ use urpm::sys; use urpm::cfg; use urpm::md5sum; -our $VERSION = '4.10.19'; +our $VERSION = '5.0'; our @ISA = qw(URPM Exporter); our @EXPORT_OK = 'file_from_local_url'; @@ -24,12 +24,15 @@ sub shunt_ignorearch { eval q( sub URPM::Package::is_arch_compat { 1 } ); } +sub xml_info_policies() { qw(never on-demand update-only always) } + sub default_options { { 'split-level' => 1, 'split-length' => 8, 'verify-rpm' => 1, 'post-clean' => 1, + 'xml-info' => 'on-demand', }; } @@ -91,6 +94,23 @@ sub prefer_rooted { -e "$root$file" ? "$root$file" : $file; } +sub userdir_prefix { + my ($_urpm) = @_; + '/tmp/.urpmi-'; +} +sub userdir { + my ($urpm) = @_; + $< or return; + + my $dir = ($urpm->{urpmi_root} || '') . userdir_prefix($urpm) . $<; + mkdir $dir, 0755; # try to create it + + -d $dir && ! -l $dir or $urpm->{fatal}(N("fail to create directory %s", $dir)); + -o $dir && -w $dir or $urpm->{fatal}(N("invalid owner for directory %s", $dir)); + + $dir; +} + sub set_files { my ($urpm, $urpmi_root) = @_; my %h = ( diff --git a/urpm/args.pm b/urpm/args.pm index 585166f4..0181a544 100644 --- a/urpm/args.pm +++ b/urpm/args.pm @@ -303,6 +303,7 @@ my %options_spec = ( }, 'urpmi.addmedia' => { + 'xml-info=s' => \$options{'xml-info'}, 'no-probe' => sub { $options{probe_with} = undef }, distrib => sub { $options{distrib} = 1 }, interactive => sub { $options{interactive} = 1 }, diff --git a/urpm/cfg.pm b/urpm/cfg.pm index 68421910..01fbc9cd 100644 --- a/urpm/cfg.pm +++ b/urpm/cfg.pm @@ -149,6 +149,7 @@ sub load_config_raw { |removable |md5sum |limit-rate + |xml-info |excludepath |split-(?:level|length) |priority-upgrade diff --git a/urpm/media.pm b/urpm/media.pm index 7b570542..ad66b9ed 100644 --- a/urpm/media.pm +++ b/urpm/media.pm @@ -12,7 +12,6 @@ use MDV::Distribconf; our @PER_MEDIA_OPT = qw( downloader - hdlist ignore key-ids list @@ -29,8 +28,11 @@ our @PER_MEDIA_OPT = qw( virtual with_hdlist with_synthesis + xml-info ); +my @xml_media_info = ('info', 'files', 'changelog'); + sub get_medium_option { my ($urpm, $medium, $option_name) = @_; @@ -259,6 +261,14 @@ sub statedir_MD5SUM { my ($urpm, $medium) = @_; statedir_media_info_file($urpm, $medium, 'MD5SUM', ''); } +sub statedir_hdlist { + my ($urpm, $medium) = @_; + statedir_media_info_file($urpm, $medium, 'hdlist', '.cz'); +} +sub statedir_xml_info { + my ($urpm, $medium, $xml_info) = @_; + statedir_media_info_file($urpm, $medium, $xml_info, '.xml.lzma'); +} sub cachedir_with_synthesis { my ($urpm, $medium) = @_; _url_with_synthesis($medium) && "$urpm->{cachedir}/partial/" . _url_with_synthesis_basename($medium); @@ -269,6 +279,27 @@ sub any_synthesis { : statedir_synthesis($urpm, $medium); -e $f && $f; } +sub any_media_info_file { + my ($urpm, $medium, $prefix, $suffix, $quiet) = @_; + + if (my $base = file_from_file_url($medium->{url})) { + my $f = $medium->{with_synthesis} + ? reduce_pathname("$base/$prefix." . _synthesis_suffix($medium) . $suffix) + : _synthesis_dir($medium) . "/$prefix$suffix"; + + -e $f && $f; + } else { + _any_media_info__or_download($urpm, $medium, $prefix, $suffix, $quiet); + } +} +sub any_hdlist { + my ($urpm, $medium, $quiet) = @_; + any_media_info_file($urpm, $medium, 'hdlist', '.cz', $quiet); +} +sub any_xml_info { + my ($urpm, $medium, $xml_info, $quiet) = @_; + any_media_info_file($urpm, $medium, $xml_info, '.xml.lzma', $quiet); +} sub name2medium { my ($urpm, $name) = @_; @@ -276,6 +307,27 @@ sub name2medium { $medium; } +sub userdirs { + my ($urpm) = @_; + my $prefix = urpm::userdir_prefix($urpm); + grep { m!^\Q$prefix\E\d+$! && -d $_ && ! -l $_ } glob("$prefix*"); +} + +sub remove_user_media_info_files { + my ($urpm, $medium) = @_; + + foreach my $dir (userdirs($urpm)) { + require File::Glob; + # we can't use perl's glob() because $medium->{name} can contain spaces + my @files = map { File::Glob::bsd_glob("$dir/*.$medium->{name}.$_") } 'cz', 'xml.lzma' or next; + + $urpm->{log}("cleaning $dir"); + foreach (@files) { + unlink $_ or $urpm->{error}("removing $_ failed"); + } + } +} + #- probe device associated with a removable device. sub probe_removable_device { my ($urpm, $medium) = @_; @@ -357,8 +409,7 @@ sub _tempignore { #- nocheck_access (used by read_config) #- #- callback (urpmf) -#- need_xml (for urpmf: to be able to have info not available in synthesis) -#- nodepslist (for urpmq: we don't need the synthesis) +#- nodepslist (for urpmq, urpmf: when we don't need the synthesis) #- no_skiplist (urpmf) #- #- synthesis (use this synthesis file, and only this synthesis file) @@ -456,14 +507,9 @@ sub _parse_media { my ($urpm, $options) = @_; foreach (grep { !$_->{ignore} && (!$options->{update} || $_->{update}) } @{$urpm->{media} || []}) { - our $currentmedia = $_; #- hack for urpmf delete @$_{qw(start end)}; _parse_synthesis_or_ignore($urpm, $_, $options->{callback}); - if ($options->{need_xml}) { - # TODO - # _parse_xml_($urpm, $_, any_xml($urpm, $_), $options->{callback}); - } if ($_->{searchmedia}) { $urpm->{searchmedia} = 1; $urpm->{log}(N("Search start: %s end: %s", $_->{start}, $_->{end})); @@ -506,7 +552,7 @@ sub _compute_flags_for_instlist { #- add a new medium, sync the config file accordingly. #- returns the new medium's name. (might be different from the requested #- name if index_name was specified) -#- options: ignore, index_name, nolock, update, virtual, media_info_dir +#- options: ignore, index_name, nolock, update, virtual, media_info_dir, xml-info sub add_medium { my ($urpm, $name, $url, $with_synthesis, %options) = @_; @@ -532,7 +578,7 @@ sub add_medium { url => $url, modified => !$options{ignore}, }; - foreach (qw(downloader update ignore media_info_dir)) { + foreach (qw(downloader update ignore media_info_dir xml-info)) { $medium->{$_} = $options{$_} if exists $options{$_}; } @@ -729,7 +775,9 @@ sub remove_media { $urpm->{modified} = 1; #- remove files associated with this medium. - unlink grep { $_ } map { $_->($urpm, $medium) } \&statedir_synthesis, \&statedir_descriptions, \&statedir_names, \&statedir_MD5SUM; + unlink grep { $_ } map { $_->($urpm, $medium) } \&statedir_synthesis, \&statedir_descriptions, \&statedir_names, \&statedir_MD5SUM, \&statedir_hdlist; + unlink statedir_xml_info($urpm, $medium, $_) foreach @xml_media_info; + remove_user_media_info_files($urpm, $medium); #- remove proxy settings for this media urpm::download::remove_proxy_media($medium->{name}); @@ -941,23 +989,26 @@ sub _download_list_or_pubkey { } sub _download_media_info_file { - my ($urpm, $medium, $prefix, $suffix, $quiet) = @_; + my ($urpm, $medium, $prefix, $suffix, $quiet, $o_download_dir) = @_; + my $download_dir = $o_download_dir || "$urpm->{cachedir}/partial"; my $name = "$prefix$suffix"; + my $result_file = "$download_dir/$name"; my $found; if (_synthesis_suffix($medium)) { my $local_name = $prefix . _synthesis_suffix($medium) . $suffix; if (urpm::download::sync($urpm, $medium, [_synthesis_dir($medium) . "/$local_name"], - quiet => $quiet)) { - rename("$urpm->{cachedir}/partial/$local_name", "$urpm->{cachedir}/partial/$name"); + dir => $download_dir, quiet => $quiet)) { + rename("$download_dir/$local_name", $result_file); $found = 1; } } if (!$found) { - urpm::download::sync($urpm, $medium, [_synthesis_dir($medium) . "/$name"], quiet => 1) - or unlink "$urpm->{cachedir}/partial/$name"; + urpm::download::sync($urpm, $medium, [_synthesis_dir($medium) . "/$name"], dir => $download_dir, quiet => 1) + or unlink $result_file; } + -s $result_file && $result_file; } sub get_descriptions_local { @@ -1340,6 +1391,14 @@ sub _update_medium_ { statedir_MD5SUM($urpm, $medium)) if -e "$urpm->{cachedir}/partial/MD5SUM"; } + # we never download hdlist by default. urpmf will download it via any_hdlist() if really needed + unlink statedir_hdlist($urpm, $medium); + + remove_user_media_info_files($urpm, $medium); + + if (!file_from_file_url($medium->{url})) { + _retrieve_xml_media_info_or_remove($urpm, $medium, $options{quiet}) or return; + } } $medium->{modified} = 0; @@ -1430,6 +1489,73 @@ sub update_media { $updates_result{error} == 0; } +sub _retrieve_xml_media_info_or_remove { + my ($urpm, $medium, $quiet) = @_; + + my $ok = 1; + + foreach my $xml_info (@xml_media_info) { + my $f = statedir_xml_info($urpm, $medium, $xml_info); + + if ($medium->{removable} || + get_medium_option($urpm, $medium, 'xml-info') eq 'always' || + get_medium_option($urpm, $medium, 'xml-info') eq 'update-only' && -e $f) { + $ok &&= _retrieve_media_info_file_and_check_MD5SUM($urpm, $medium, $xml_info, '.xml.lzma', $quiet); + } else { + #- "on-demand" + unlink $f; + } + } + $ok; +} + +sub _retrieve_media_info_file_and_check_MD5SUM { + my ($urpm, $medium, $prefix, $suffix, $quiet) = @_; + + my $name = "$prefix$suffix"; + my $cachedir_file = + file_from_local_url($medium->{url}) ? + _copy_media_info_file($urpm, $medium, $prefix, $suffix) : + _download_media_info_file($urpm, $medium, $prefix, $suffix, $quiet) or + $urpm->{error}(N("retrieval of [%s] failed", _synthesis_dir($medium) . "/$name")), return; + + my $wanted_md5sum = urpm::md5sum::from_MD5SUM__or_warn($urpm, statedir_MD5SUM($urpm, $medium), $name); + if ($wanted_md5sum) { + $urpm->{debug}("computing md5sum of retrieved $name") if $urpm->{debug}; + urpm::md5sum::compute($cachedir_file) eq $wanted_md5sum or + $urpm->{error}(N("retrieval of [%s] failed (md5sum mismatch)", _synthesis_dir($medium) . "/$name")), return; + + urpm::util::move($cachedir_file, statedir_media_info_file($urpm, $medium, $prefix, $suffix)) or return; + } + 1; +} + +sub _any_media_info__or_download { + my ($urpm, $medium, $prefix, $suffix, $quiet) = @_; + + my $name = "$prefix.$medium->{name}$suffix"; + my $f = "$urpm->{statedir}/$name"; + -s $f and return $f; + + my $download_dir; + if (my $userdir = urpm::userdir($urpm)) { + $f = "$userdir/$name"; + -s $f and return $f; + + $download_dir = "$userdir/partial"; + mkdir $download_dir; + } + + get_medium_option($urpm, $medium, 'xml-info') ne 'never' or return; + + my $file_in_partial = + _download_media_info_file($urpm, $medium, $prefix, $suffix, $quiet, $download_dir) or return; + + urpm::util::move($file_in_partial, $f) or return; + + $f; +} + #- clean params and depslist computation zone. sub clean { my ($urpm) = @_; diff --git a/urpm/xml_info.pm b/urpm/xml_info.pm new file mode 100644 index 00000000..4148940f --- /dev/null +++ b/urpm/xml_info.pm @@ -0,0 +1,168 @@ +package urpm::xml_info; + +use XML::LibXML::Reader; + +# throw an exception on error +sub get_nodes { + my ($xml_info, $xml_info_file, $fullnames) = @_; + + my $get_one_node = _get_one_node($xml_info); + _get_xml_info_nodes($xml_info_file, $get_one_node, $fullnames); +} + +# throw an exception on error +sub do_something_with_nodes { + my ($xml_info, $xml_info_file, $do, $o_wanted_attributes) = @_; + + my $get_one_node = _get_one_node($xml_info, $o_wanted_attributes); + _do_something_with_xml_info_nodes($xml_info_file, $get_one_node, $do); +} + + +################################################################################ +sub _open_xml_reader { + my ($xml_info_file) = @_; + + open(my $F, "lzma -dc $xml_info_file |"); + my $reader = new XML::LibXML::Reader(IO => $F) or die "cannot read $xml_info_file\n"; + + $reader->read; + $reader->name eq 'media_info' or die "global tag not found\n"; + + $reader->read; # first tag + + $reader; +} + +sub _get_all_attributes { + my ($reader) = @_; + my %entry; + + $reader->moveToFirstAttribute; + + do { + $entry{$reader->name} = $reader->value; + } while $reader->moveToNextAttribute == 1; + + \%entry; +} + +sub _get_attributes { + my ($reader, $o_wanted_attributes) = @_; + + if ($o_wanted_attributes) { + my %entry = map { $_ => $reader->getAttribute($_) } @$o_wanted_attributes; + \%entry; + } else { + _get_all_attributes($reader); + } +} + +sub _get_simple_value_node { + my ($value_name, $o_wanted_attributes) = @_; + + sub { + my ($reader) = @_; + my $entry = _get_attributes($reader, $o_wanted_attributes); + + $reader->read; # get value + $entry->{$value_name} = $reader->value; + $entry->{$value_name} =~ s/^\n//; + + $reader->read; # close tag + $reader->read; # open next tag + + $entry; + }; +} + +sub _get_changelog_node { + my ($reader, $fn) = @_; + + $reader->nextElement('log'); # get first + + my @changelogs; + my $time; + while ($time = $reader->getAttribute('time')) { + push @changelogs, my $e = { time => $time }; + + $reader->nextElement('log_name'); $reader->read; + $e->{name} = $reader->value; + + $reader->nextElement('log_text'); $reader->read; + $e->{text} = $reader->value; + + $reader->read; # + $reader->read; # + $reader->read; # + $reader->read if $reader->readState != 0; # there may be SIGNIFICANT_WHITESPACE between and + } + + { fn => $fn, changelogs => \@changelogs }; +} + +sub _get_one_node { + my ($xml_info, $o_wanted_attributes) = @_; + + if ($xml_info eq 'changelog') { + \&_get_changelog_node; + } elsif ($xml_info eq 'info') { + _get_simple_value_node('description', $o_wanted_attributes); + } else { + _get_simple_value_node('files', $o_wanted_attributes); + } +} + +sub _get_xml_info_nodes { + my ($xml_info_file, $get_node, $fullnames) = @_; + + my $fullnames_re = '^(' . join('|', map { quotemeta $_ } @$fullnames) . ')$'; + + my %todo = map { $_ => 1 } @$fullnames; + my %nodes; + _iterate_on_nodes($xml_info_file, + sub { + my ($reader, $fn) = @_; + if ($fn =~ /$fullnames_re/) { + $nodes{$fn} = $get_node->($reader); + delete $todo{$fn}; + %todo == (); + } else { + $reader->next; + 0; + } + }); + + %todo and die "could not find " . join(', ', keys %todo) . " in $xml_info_file\n"; + + %nodes; +} + +sub _do_something_with_xml_info_nodes { + my ($xml_info_file, $get_node, $do) = @_; + + _iterate_on_nodes($xml_info_file, + sub { + my ($reader, $fn) = @_; + my $h = $get_node->($reader, $fn); # will read until closing tag + $do->($h); + 0; + }); +} + +sub _iterate_on_nodes { + my ($xml_info_file, $do) = @_; + + my $reader = _open_xml_reader($xml_info_file); + + my $fn; + while ($fn = $reader->getAttribute('fn')) { + $do->($reader, $fn) and return; # $do must go to next node otherwise it loops! + } + + $reader->readState == 3 || $reader->name eq 'media_info' + or die qq(missing attribute "fn" in tag ") . $reader->name . qq("\n); +} + +1; + diff --git a/urpm/xml_info_pkg.pm b/urpm/xml_info_pkg.pm new file mode 100644 index 00000000..ce0fb0cd --- /dev/null +++ b/urpm/xml_info_pkg.pm @@ -0,0 +1,46 @@ +package urpm::xml_info_pkg; + +# proxy object: returns the xml info if available, otherwise redirects to URPM::Package + +sub new { + my ($class, $hash, $pkg) = @_; + + $pkg and $hash->{pkg} = $pkg; + + bless $hash, $class; +} + + +# only available in synthesis/hdlist +sub id { $_[0]{pkg}->id } +sub group { $_[0]{pkg}->group } +sub size { $_[0]{pkg}->size } +sub epoch { $_[0]{pkg}->epoch } +sub buildhost { $_[0]{pkg}->buildhost } +sub packager { $_[0]{pkg}->packager } +sub summary { $_[0]{pkg}->summary } + + +# can be directly available in xml_info +sub url { exists $_[0]{url} ? $_[0]{url} : $_[0]{pkg}->url } +sub licence { exists $_[0]{licence} ? $_[0]{licence} : $_[0]{pkg}->licence } +sub sourcerpm { exists $_[0]{sourcerpm} ? $_[0]{sourcerpm} : $_[0]{pkg}->sourcerpm } +sub description { exists $_[0]{description} ? $_[0]{description} : $_[0]{pkg}->description } + +sub changelogs { exists $_[0]{changelogs} ? @{$_[0]{changelogs}} : $_[0]{pkg}->changelogs } + +sub files { exists $_[0]{files} ? split("\n", $_[0]{files}) : $_[0]{pkg}->files } + +my $fullname_re = qr/^(.*)-([^\-]*)-([^\-]*)\.([^\.\-]*)$/; + +# available in both {pkg} and {fn} +sub name { exists $_[0]{pkg} ? $_[0]{pkg}->name : $_[0]{fn} =~ $fullname_re && $1 } +sub version { exists $_[0]{pkg} ? $_[0]{pkg}->version : $_[0]{fn} =~ $fullname_re && $2 } +sub release { exists $_[0]{pkg} ? $_[0]{pkg}->release : $_[0]{fn} =~ $fullname_re && $3 } +sub arch { exists $_[0]{pkg} ? $_[0]{pkg}->arch : $_[0]{fn} =~ $fullname_re && $4 } + +sub fullname { wantarray ? $_[0]{pkg}->fullname : $_[0]{fn} } +sub filename { $_[0]{fn} . '.rpm' } + + +1; diff --git a/urpmf b/urpmf index 3893f7d5..68f5581e 100755 --- a/urpmf +++ b/urpmf @@ -88,12 +88,17 @@ usage: urpmf [options] pattern-expression } my %tags_per_media_info = ( - - synthesis => [ qw( + everywhere => [ qw( arch - conflicts epoch filename + name + release + version + ) ], + + synthesis => [ qw( + conflicts group obsoletes provides @@ -103,17 +108,23 @@ my %tags_per_media_info = ( summary ) ], + xml_info__info => [ qw( + description + license + sourcerpm + url + ) ], + + xml_info__files => [ qw( + files + ) ], + hdlist => [ qw( buildhost buildtime conf_files - description distribution - files - license packager - sourcerpm - url vendor ) ], ); @@ -165,7 +176,7 @@ my %usedtags; my $sprintfargs = join(', ', map { $usedtags{$_} = 1; if ($_ eq 'media') { - '$urpm::media::currentmedia->{name}'; + '$medium->{name}'; } elsif ($_ eq 'fullname') { 'scalar($pkg->fullname)'; } elsif ($_ eq 'description') { @@ -213,6 +224,7 @@ my $callback = join("\n", "}"); $urpm->{debug}("qf:[$qf]\ncallback:\n$callback") if $urpm->{debug}; +our $medium; $callback = eval $callback; if ($@) { warn "Internal error: $@\n"; @@ -229,7 +241,19 @@ if ($env) { } my $_lock = urpm::lock::urpmi_db($urpm, '', nofatal => 1, wait => $options{wait_lock}); -my $need_xml = grep { $usedtags{$_} } @{$tags_per_media_info{hdlist}}; + +my %needed_media_info = map { $_ => 1 } grep { + my $l = $tags_per_media_info{$_}; + int(grep { $usedtags{$_} } @$l); +} keys %tags_per_media_info; + +my @needed_xml_info = map { s/xml_info__// ? $_ : () } keys %needed_media_info; +if (@needed_xml_info > 1) { + # we don't handle parallel parsing of xml files, default to hdlist + $needed_media_info{hdlist} = 1; +} + +my %fullname2pkg; urpm::media::configure($urpm, nocheck_access => 1, no_skiplist => 1, @@ -239,10 +263,51 @@ urpm::media::configure($urpm, synthesis => $synthesis, usedistrib => $urpm::args::options{usedistrib}, update => $update, - callback => $callback, - need_xml => $need_xml, + @needed_xml_info && $needed_media_info{synthesis} && !$needed_media_info{hdlist} ? + # in that case, we need to have both synthesis and xml_info + (callback => sub { + my ($_urpm, $pkg) = @_; + $fullname2pkg{$pkg->fullname} = $pkg; + 1; + }) : (nodepslist => 1) ); -if ($need_xml) { - # TODO +# nb: we don't "my" $medium since it is used for $callback +if ($needed_media_info{hdlist}) { + foreach $medium (grep { !$_->{ignore} } @{$urpm->{media}}) { + my $hdlist = urpm::media::any_hdlist($urpm, $medium, $options{verbose} < 0) or + $urpm->{error}(N("no hdlist available for medium \"%s\"", $medium->{name})), next; + $urpm->{log}("getting information from $hdlist"); + $urpm->parse_hdlist($hdlist, callback => $callback); + } +} elsif (!@needed_xml_info) { + foreach $medium (grep { !$_->{ignore} } @{$urpm->{media}}) { + my $synthesis = urpm::media::any_synthesis($urpm, $medium) or + $urpm->{error}(N("no synthesis available for medium \"%s\"", $medium->{name})), next; + $urpm->{log}("getting information from $synthesis"); + $urpm->parse_synthesis($synthesis, callback => $callback); + } +} elsif (my ($xml_info) = @needed_xml_info) { + foreach $medium (grep { !$_->{ignore} } @{$urpm->{media}}) { + my $xml_info_file = urpm::media::any_xml_info($urpm, $medium, $xml_info, $options{verbose} < 0) or + $urpm->{error}(N("no xml-info available for medium \"%s\"", $medium->{name})), next; + require urpm::xml_info; + require urpm::xml_info_pkg; + + my $cooked_callback = $needed_media_info{synthesis} ? + sub { + my ($node) = @_; + my $pkg = $fullname2pkg{$node->{fn}} or warn "can't find $node->{fn} in synthesis\n"; + $pkg and $callback->($urpm, urpm::xml_info_pkg->new($node, $pkg)); + } : sub { + my ($node) = @_; + $callback->($urpm, urpm::xml_info_pkg->new($node, undef)); + }; + $urpm->{log}("getting information from $xml_info_file"); + urpm::xml_info::do_something_with_nodes( + $xml_info, + $xml_info_file, + $cooked_callback, + ); + } } diff --git a/urpmi.addmedia b/urpmi.addmedia index ba131e6b..897eb1ac 100755 --- a/urpmi.addmedia +++ b/urpmi.addmedia @@ -26,6 +26,7 @@ use urpm::msg; use urpm::download (); use urpm::cfg; use urpm::media; +use urpm::util 'member'; sub usage { my $m = shift; @@ -51,6 +52,8 @@ and [options] are from ") . N(" --proxy-user - specify user and password to use for proxy authentication (format is ). ") . N(" --update - create an update medium. +") . N(" --xml-info - use the specific policy for downloading xml info files + one of: never, on-demand, update-only, always. cf urpmi.cfg(5) ") . N(" --probe-synthesis - use synthesis file. ") . N(" --probe-rpms - use rpm files (instead of synthesis). ") . N(" --no-probe - do not try to find any synthesis file. @@ -86,6 +89,11 @@ if ($options{mirrors_url}) { } $options{force} = 0; my $urpm = urpm->new_parse_cmdline or usage(); + +if ($options{'xml-info'}) { + member($options{'xml-info'}, urpm::xml_info_policies()) or die N("known xml-info policies are %s", join(', ', urpm::xml_info_policies())) . "\n"; +} + our ($name, $url, $with, $relative_synthesis) = our @cmdline; $options{quiet} = 1 if $options{verbose} < 0; @@ -130,6 +138,7 @@ if ($options{distrib}) { probe_with => $options{probe_with}, nolock => $options{nolock}, ask_media => $add_media_callback, + 'xml-info' => $options{'xml-info'}, ); $ok = urpm::media::update_media($urpm, %options, quiet => $options{verbose} < 0, @@ -150,6 +159,7 @@ if ($options{distrib}) { update => $options{update}, ignore => $options{raw}, nolock => $options{nolock}, + 'xml-info' => $options{'xml-info'}, ); urpm::download::copy_cmd_line_proxy($name); if ($options{raw}) { diff --git a/urpmq b/urpmq index 345548c4..d6f4e52d 100755 --- a/urpmq +++ b/urpmq @@ -318,12 +318,11 @@ if ($options{list_aliases}) { || $options{changelog}) { my ($local_sources, $list) = urpm::get_pkgs::selected2list($urpm, $state->{selected}); + my %xml_info_pkgs; if ($options{info} || $options{files} || $options{changelog}) { # get descriptions of update sources my $updates_descr = urpm::get_updates_description($urpm); - # if not root, use a temporary directory to store headers - my $tmp_header_dir = $< != 0 ? urpm::sys::mktempdir() : "$urpm->{cachedir}/headers"; my @selected = uniq(map { keys %$_ } @$list); @@ -331,29 +330,29 @@ if ($options{list_aliases}) { my @l = grep { $medium->{start} <= $_ && $_ <= $medium->{end} } @selected or next; my @pkgs = map { $urpm->{depslist}[$_] } @l or next; - # TODO - my $hdlist_path = urpm::media::any_hdlist($urpm, $medium); - if (-s $hdlist_path) { - require MDV::Packdrakeng; - my $packer = MDV::Packdrakeng->open(archive => $hdlist_path, quiet => 1); - defined $packer or do { - warn "Can't open archive: $MDV::Packdrakeng::error"; - exit 1; - }; - my @headers = map { $_->header_filename } @pkgs or next; - $packer->extract($tmp_header_dir, @headers); - } elsif (my $dir = file_from_local_url($medium->{url})) { + if (my $dir = file_from_local_url($medium->{url})) { + $urpm->{log}("getting information from rpms from $dir"); $local_sources->{$_->id} = "$dir/" . $_->filename foreach @pkgs; } else { - my $pkgs_text = join(' ', map { $_->name } @pkgs); - if ($options{info}) { - $urpm->{info}((int(@pkgs) == 1) ? - N("no hdlist for medium \"%s\", only partial result for package %s", $medium->{name}, $pkgs_text) - : N("no hdlist for medium \"%s\", only partial result for packages %s", $medium->{name}, $pkgs_text)); - } else { - $urpm->{error}((int(@pkgs) == 1) ? - N("no hdlist for medium \"%s\", unable to return any result for package %s",$medium->{name}, $pkgs_text) - : N("no hdlist for medium \"%s\", unable to return any result for packages %s", $medium->{name}, $pkgs_text)); + foreach my $xml_info (grep { $options{$_} } 'info', 'files', 'changelog') { + if (my $xml_info_file = urpm::media::any_xml_info($urpm, $medium, $xml_info, $options{verbose} < 0)) { + require urpm::xml_info; + require urpm::xml_info_pkg; + $urpm->{log}("getting information from $xml_info_file"); + my %nodes = urpm::xml_info::get_nodes($xml_info, $xml_info_file, [ map { scalar $_->fullname } @pkgs ]); + put_in_hash($xml_info_pkgs{$_->id} ||= {}, $nodes{$_->fullname}) foreach @pkgs; + } else { + my $pkgs_text = join(' ', map { $_->name } @pkgs); + if ($xml_info eq 'info') { + $urpm->{info}((int(@pkgs) == 1) ? + N("no xml info for medium \"%s\", only partial result for package %s", $medium->{name}, $pkgs_text) + : N("no xml info for medium \"%s\", only partial result for packages %s", $medium->{name}, $pkgs_text)); + } else { + $urpm->{error}((int(@pkgs) == 1) ? + N("no xml info for medium \"%s\", unable to return any result for package %s",$medium->{name}, $pkgs_text) + : N("no xml info for medium \"%s\", unable to return any result for packages %s", $medium->{name}, $pkgs_text)); + } + } } } } @@ -362,11 +361,12 @@ if ($options{list_aliases}) { my $pkg = $urpm->{depslist}[$id] or next; #- even if non-root, search for a header in the global cachedir - my $file1 = $local_sources->{$id} || "$urpm->{cachedir}/headers/" . $pkg->header_filename; - my $file2 = "$tmp_header_dir/" . $pkg->header_filename; - if (my ($file) = grep { -s $_ } $file1, $file2) { + my $file = $local_sources->{$id} || "$urpm->{cachedir}/headers/" . $pkg->header_filename; + if (-s $file) { $pkg->update_header($file, keep_all_tags => 1); - $file eq $file2 and unlink $file; + } elsif ($xml_info_pkgs{$id}) { + # using the proxy urpm::xml_info_pkg object + $pkg = urpm::xml_info_pkg->new($xml_info_pkgs{$id}, $pkg); } if ($options{info}) { printf "%-12s: %s\n", "Name", $pkg->name; @@ -416,7 +416,6 @@ if ($options{list_aliases}) { } } } - -d $tmp_header_dir && $< != 0 and rmdir $tmp_header_dir; } elsif ($options{sources}) { print join "\n", values %$local_sources; values %$local_sources and print "\n"; foreach (0..$#{$urpm->{media} || []}) { -- cgit v1.2.1