aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMaarten Vanraes <alien@mageia.org>2012-02-28 02:37:24 +0000
committerMaarten Vanraes <alien@mageia.org>2012-02-28 02:37:24 +0000
commit0b89aede6f81df6927f75988939361fe88fc97f1 (patch)
treef769ea9cc96bc9687ca1021dac7142da6b17f0c8
parentef020d58538abb702b8181be990dba6424019b89 (diff)
downloadurpmi-proxy-0b89aede6f81df6927f75988939361fe88fc97f1.tar
urpmi-proxy-0b89aede6f81df6927f75988939361fe88fc97f1.tar.gz
urpmi-proxy-0b89aede6f81df6927f75988939361fe88fc97f1.tar.bz2
urpmi-proxy-0b89aede6f81df6927f75988939361fe88fc97f1.tar.xz
urpmi-proxy-0b89aede6f81df6927f75988939361fe88fc97f1.zip
- update config file0.3.1
- improve logging - implement IF_MODIFIED_SINCE - use lstat when needed - implement Last-Modified header usage - more debug logging - clean up for perl_checker - bump version due to log file changes
-rw-r--r--TODO2
-rw-r--r--VERSION2
-rwxr-xr-xurpmi-proxy.cgi135
-rw-r--r--urpmi-proxy.conf4
4 files changed, 95 insertions, 48 deletions
diff --git a/TODO b/TODO
index c4ae1a4..9074350 100644
--- a/TODO
+++ b/TODO
@@ -1,8 +1,8 @@
urpmi-proxy TODO:
-----------------
- - fix sending 304 not modified when requested!
- check unmodified for FTP
- fix chosen mirrorlist better
+ - fix non-mirrorlist urpmi
- fix rsync
- delete old rpm versions (or another way to keep cache small)
- better config system
diff --git a/VERSION b/VERSION
index abd4105..9e11b32 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.2.4
+0.3.1
diff --git a/urpmi-proxy.cgi b/urpmi-proxy.cgi
index 12475a0..78b4865 100755
--- a/urpmi-proxy.cgi
+++ b/urpmi-proxy.cgi
@@ -1,5 +1,5 @@
#!/usr/bin/perl -w
-## written by Maarten Vanraes (c) 2009-2011
+## written by Maarten Vanraes (c) 2009-2012
## urpmi-proxy is GPLv2+
use strict;
@@ -12,7 +12,7 @@ my $config_file = '/etc/urpmi-proxy.conf';
my $cache_tmp_path = '/var/tmp/urpmi-proxy';
my $cache_path = '/var/cache/urpmi-proxy';
my $logfile = '/var/log/urpmi-proxy.log';
-my $check_updates_only_files = 'MD5SUM';
+my $check_updates_only_files = '(MD5SUM|descriptions)';
my $check_no_updates_files;
my $merge_files = 'media.cfg';
my $sources = [
@@ -28,8 +28,8 @@ if (-R $config_file) {
my $r = open(FILE, '<', $config_file);
if ($r) {
my $l = '';
- while (<FILE>) {
- $l .= $_;
+ while (my $i = <FILE>) {
+ $l .= $i;
}
eval $l;
close FILE;
@@ -37,10 +37,9 @@ if (-R $config_file) {
}
print STDERR "logfile: $logfile\n" if $debug;
-my $sou;
print STDERR "orig sources: " . scalar(@$sources) . "\n" if $debug;
if ($debug) {
- foreach $sou (@$sources) {
+ foreach my $sou (@$sources) {
print STDERR " - " . $sou . "\n";
}
}
@@ -63,6 +62,19 @@ print STDERR "file: $file\n" if $debug;
print STDERR "dest_path: $dest_path\n" if $debug;
print STDERR "filename: $filename\n" if $debug;
+# check if there's a time condition
+my $modified_since = 0;
+if (defined $ENV{HTTP_IF_MODIFIED_SINCE}) {
+ $modified_since = `date --date="$ENV{HTTP_IF_MODIFIED_SINCE}" "+%s"` if defined $ENV{HTTP_IF_MODIFIED_SINCE};
+ $modified_since =~ s/[\s\r\n]*$//;
+ print STDERR "is modified since: $ENV{HTTP_IF_MODIFIED_SINCE} ($modified_since) ?\n" if $debug && $modified_since;
+}
+
+# set request ENV vars
+my $ip = $ENV{REMOTE_ADDR};
+my $user_agent = '';
+$user_agent = $ENV{HTTP_USER_AGENT} if defined $ENV{HTTP_USER_AGENT};
+
# check if request needs update checking
my $check_file = 1;
$check_file = 0 if defined $check_no_updates_files && $filename =~ m/$check_no_updates_files/;
@@ -78,22 +90,18 @@ $check_file = 1 if $merge;
print STDERR "check_file: $check_file\n" if $debug;
print STDERR "merge: $merge\n" if $debug;
-# if the file needs no update checks, check in cache
-return_file($cache_path, $file, $logfile, 'CACHED_NO_CHECK') if (!$check_file && $filename && -R $cache_path . $file);
-
# get datetime from local file if it exists
-my $timestamp = 0;
-if ($filename && !$check_file && -R $cache_path . $file) {
- my @sv = lstat($cache_path . $file);
- $timestamp = $sv[9];
- print STDERR "timestamp: $timestamp\n" if $debug;
+my @stat = lstat($cache_path . $file);
+if ($filename && scalar(@stat) > 0) {
+ print STDERR "timestamp: $stat[9]\n" if $debug;
+ # if the file needs no update checks, check in cache
+ return_file($cache_path, $file, $logfile, 'HIT_NO_CHECK', \@stat, $ip, $user_agent) if !$check_file;
}
# set up curl with timecheck
my $curl;
my $r = 0;
my $file_sent = 0;
-#my $file_deleted = 0;
my $file_unmodified = 0;
my $file_time = -1;
my $err = 200;
@@ -101,7 +109,7 @@ my $err = 200;
# prepare curl transfer
my $tmp_file = $cache_tmp_path . "/" . rand() . $$;
open(FILEHANDLE, ">", $tmp_file) or do {
- _log($logfile, $file, 500, 'MISS');
+ _log($logfile, $file, 500, 'MISS', -1, $ip, $user_agent);
return_error(500, 'Server error');
};
binmode(FILEHANDLE);
@@ -109,7 +117,7 @@ my %curldata = (fh => \*FILEHANDLE, file_sent => \$file_sent, content_type => $f
print STDERR "sources: " . scalar(@$sources) . "\n" if $debug;
if ($debug) {
- foreach $sou (@$sources) {
+ foreach my $sou (@$sources) {
print STDERR " - " . $sou . "\n";
}
}
@@ -150,7 +158,7 @@ foreach my $s (@$sources) {
# check for source
print STDERR "interpolated sources: " . scalar(@sources) . "\n" if $debug;
if ($debug) {
- foreach $sou (@sources) {
+ foreach my $sou (@sources) {
print STDERR " - " . $sou . "\n";
}
}
@@ -163,8 +171,8 @@ foreach my $source (@sources) {
my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache');
if ($res) {
my $mirrorcache = '';
- while (<FILE>) {
- $mirrorcache .= $_;
+ while (my $i = <FILE>) {
+ $mirrorcache .= $i;
}
close FILE;
my $host_loc = $loc;
@@ -198,8 +206,8 @@ foreach my $source (@sources) {
my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache');
if ($res) {
my $mirrorcache = '';
- while (<FILE>) {
- $mirrorcache .= $_;
+ while (my $i = <FILE>) {
+ $mirrorcache .= $i;
}
close FILE;
my $loc_host = $loc;
@@ -229,20 +237,29 @@ foreach my $source (@sources) {
}
}
if ($type eq 'file') {
- if ($filename && -R $loc . $file) {
+ my @statl = lstat($loc . $file);
+ if ($filename && scalar(@statl) > 0) {
my $ft = `file -b --mime-type $loc$file`;
$ft =~ s/[\s\r\n]*$//;
- print STDERR "mimetype: '$ft'\n" if $debug;
- print STDERR "size: " . (-s $loc . $file) . "\n" if $debug;
+ my $t = localtime($statl[9]);
+ print STDERR "HTTP Header: 200 OK\n" if $debug;
+ print STDERR "Content-Type: $ft\n" if $debug;
+ print STDERR "Content-Length: " . $statl[7] . "\n" if $debug;
+ print STDERR "Last-Modified: " . $t . "\n" if $debug;
$r = open(FILE, "<", $loc . $file);
if ($r) {
print STDERR "file fetch url '" . $loc . $file . "'\n" if $debug;
if (!$file_sent) {
- $file_sent = 1;
+ $file_sent = $statl[7];
+ print "Status: 200 OK\r\n";
print "Content-Type: " . $ft . "\r\n";
- print "Content-Length: " . (-s $loc . $file) . "\r\n" if !$merge;
+ print "Content-Length: " . $statl[7] . "\r\n" if !$merge;
+ print "Last-Modified: " . $t . "\r\n";
print "\r\n";
}
+ else {
+ $file_sent += $statl[7];
+ }
binmode(FILE);
my $buf;
while (read(FILE, $buf, 1024)) {
@@ -267,10 +284,15 @@ foreach my $source (@sources) {
use WWW::Curl::Easy;
# set up curl stuff
$curl = new WWW::Curl::Easy;
- if ($timestamp > 0) {
+ if (scalar(@stat) > 0 && $stat[9] > $modified_since) {
+ $curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE
+ $curl->setopt(CURLOPT_TIMEVALUE, $stat[9]);
+ }
+ elsif ($modified_since > 0) {
$curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE
- $curl->setopt(CURLOPT_TIMEVALUE, $timestamp);
+ $curl->setopt(CURLOPT_TIMEVALUE, $modified_since);
}
+ $curl->setopt(CURLOPT_USERAGENT, $user_agent) if $user_agent;
$curl->setopt(CURLOPT_CONNECTTIMEOUT, $connect_timeout);
$curl->setopt(CURLOPT_FTP_RESPONSE_TIMEOUT, $ftp_response_timeout);
$curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $max_stall_speed);
@@ -294,6 +316,7 @@ foreach my $source (@sources) {
if ($err =~ m/^2/ || $err == 304) {
if ($curl->getinfo(CURLINFO_CONDITION_UNMET)) {
$file_unmodified = 1;
+ print STDERR "condition unmet\n" if $debug;
}
$file_time = $curl->getinfo(CURLINFO_FILETIME);
}
@@ -310,6 +333,7 @@ foreach my $source (@sources) {
else {
print STDERR "transfer error: this source does not have a type\n" if $debug;
}
+ print STDERR "file_sent: $file_sent\n" if $debug;
last if $file_sent && !$merge;
}
@@ -330,24 +354,34 @@ if ($file_sent && $r == 0 && $err =~ m/^2/ && $filename) {
else {
print STDERR "WARNING: containing path for '$cache_path$file' could not be created\n";
}
- _log($logfile, $file, 200, 'MISS');
+ _log($logfile, $file, 200, 'MISS', $file_sent, $ip, $user_agent);
}
else {
unlink($tmp_file);
if ($file_sent) {
if ($filename) {
- _log($logfile, $file, $err, 'MISS_FAIL_SENT');
+ _log($logfile, $file, $err, 'MISS_FAIL_SENT', $file_sent, $ip, $user_agent);
}
else {
# It was actually successful, but paths can't be saved...
print STDERR "NOTICE: paths cant be saved: '$file'\n" if $debug;
- _log($logfile, $file, $err, 'MISS');
+ _log($logfile, $file, $err, 'MISS', $file_sent, $ip, $user_agent);
}
exit 0;
}
- $extra = '_UNMODIFIED' if $file_unmodified;
- return_file($cache_path, $file, $logfile, 'HIT_AFTER_FAIL' . $extra) if $filename && -R $cache_path . $file;
- _log($logfile, $file, 404, 'MISS_FAIL');
+ if ($file_unmodified) {
+ $extra = '_UNMODIFIED';
+ if ($modified_since > 0 && (scalar(@stat) == 0 || $stat[9] <= $modified_since)) {
+ # it's been requested, so we can answer unmodified
+ _log($logfile, $file, 304, 'MISS' . $extra, 0, $ip, $user_agent);
+ return_error(304, 'Unmodified');
+ }
+ }
+ else {
+ $extra = 'AFTER_FAIL';
+ }
+ return_file($cache_path, $file, $logfile, 'HIT' . $extra, \@stat, $ip, $user_agent) if $filename && scalar(@stat) > 0;
+ _log($logfile, $file, 404, 'MISS_FAIL', -1, $ip, $user_agent);
return_error(404, 'File not found');
}
@@ -361,6 +395,7 @@ sub header_function {
$data->{http_header} = $1 if $ptr =~ m!^HTTP/[0-9.]+\s+(.+?)[\s\r\n]*$!;
$data->{content_type} = $1 if $ptr =~ m/^Content-[tT]ype:\s+(.+?)[\s\r\n]*$/;
$data->{size} = $1 if $ptr =~ m/^Content-[lL]ength:\s+(.+?)[\s\r\n]*$/;
+ $data->{date} = $1 if $ptr =~ m/^Last-[mM]odified:\s+(.+?)[\s\r\n]*$/;
$data->{size} = $1 if $ptr =~ m/^213\s+(.+?)[\s\r\n]*$/;
}
return length($ptr);
@@ -371,47 +406,59 @@ sub write_function {
my $f = ${$data->{fh}};
print $f ($ptr);
if (!${$data->{file_sent}}) {
- ${$data->{file_sent}} = 1;
+ ${$data->{file_sent}} = length($ptr);
print STDERR "HTTP header: " . $data->{http_header} . "\n" if $debug && defined $data->{http_header};
print STDERR "Content-Type: " . $data->{content_type} . "\n" if $debug && defined $data->{content_type};
print STDERR "Content-Length: " . $data->{size} . "\n" if $debug;
+ print STDERR "Last-Modified: " . $data->{date} . "\n" if $debug && defined $data->{date};
print "Status: " . $data->{http_header} . "\r\n" if $data->{http_header} && $data->{http_header} !~ m/^2/;
print "Content-Type: " . $data->{content_type} . "\r\n" if $data->{content_type};
print "Content-Length: " . $data->{size} . "\r\n" if $data->{size} > -1 && !${$data->{merge}};
+ print "Last-Modified: " . $data->{date} . "\r\n" if $data->{date};
print "\r\n";
}
+ else {
+ ${$data->{file_sent}} += length($ptr);
+ }
print $ptr;
return length($ptr);
}
sub _log {
- my ($logfile, $file, $code, $cached) = @_;
+ my ($logfile, $file, $code, $cached, $size, $ip, $user_agent) = @_;
my $date = `date`;
$date =~ s/[\s\r\n]*$//;
+ $size = '-' if $size < 0;
open(FILE, ">>" . $logfile) or return;
- print FILE "[" . $date . "] $file - $code - $cached\n";
+ print FILE "[" . $date . "] $ip $code $size $cached '$file' '$user_agent'\n";
close(FILE);
}
sub return_file {
- my ($cache_path, $file, $logfile, $cached)=@_;
+ my ($cache_path, $file, $logfile, $cached, $stat, $ip, $user_agent)=@_;
open(FILE, "<", $cache_path . $file) or do {
- _log($logfile, $file, 500, $cached);
+ _log($logfile, $file, 500, $cached, -1, $ip, $user_agent);
return_error(500, 'Server error');
};
my $ft = `file -b --mime-type $cache_path$file`;
$ft =~ s/[\s\r\n]*$//;
- print STDERR "mimetype: '$ft'\n" if $debug;
- print STDERR "size: " . (-s $cache_path . $file) . "\n" if $debug;
+ my $t = localtime($stat->[9]);
+ print STDERR "HTTP header: 200 OK\n" if $debug;
+ print STDERR "Content-Type: '$ft'\n" if $debug;
+ print STDERR "Content-Length: " . $stat->[7] . "\n" if $debug;
+ print STDERR "Last-Modified: " . $t . "\n" if $debug;
+ print "Status: 200 OK\r\n";
print "Content-Type: " . $ft . "\r\n";
- print "Content-Length: " . (-s $cache_path . $file) . "\r\n\r\n";
+ print "Content-Length: " . $stat->[7] . "\r\n";
+ print "Last-Modified: " . $t . "\r\n";
+ print "\r\n";
binmode(FILE);
my $buf;
while (read(FILE, $buf, 1024)) {
print $buf;
}
close FILE;
- _log($logfile, $file, 200, $cached);
+ _log($logfile, $file, 200, $cached, $stat->[7], $ip, $user_agent);
exit 0;
}
diff --git a/urpmi-proxy.conf b/urpmi-proxy.conf
index 2017b31..780469d 100644
--- a/urpmi-proxy.conf
+++ b/urpmi-proxy.conf
@@ -34,9 +34,9 @@
# $cache_tmp_path = '/var/tmp/urpmi-proxy';
## These files will always be checked for newer versions.
-# $check_updates_only_files = 'MD5SUM';
+# $check_updates_only_files = '(MD5SUM|descriptions)$';
-## These files will not be check for newer versions if they exist.
+## These files will not be checked for newer versions if they exist.
# $check_no_updates_files = undef;
## These files will be merged from every sources.