From 0b89aede6f81df6927f75988939361fe88fc97f1 Mon Sep 17 00:00:00 2001 From: Maarten Vanraes Date: Tue, 28 Feb 2012 02:37:24 +0000 Subject: - update config file - improve logging - implement IF_MODIFIED_SINCE - use lstat when needed - implement Last-Modified header usage - more debug logging - clean up for perl_checker - bump version due to log file changes --- TODO | 2 +- VERSION | 2 +- urpmi-proxy.cgi | 135 +++++++++++++++++++++++++++++++++++++------------------ urpmi-proxy.conf | 4 +- 4 files changed, 95 insertions(+), 48 deletions(-) diff --git a/TODO b/TODO index c4ae1a4..9074350 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,8 @@ urpmi-proxy TODO: ----------------- - - fix sending 304 not modified when requested! - check unmodified for FTP - fix chosen mirrorlist better + - fix non-mirrorlist urpmi - fix rsync - delete old rpm versions (or another way to keep cache small) - better config system diff --git a/VERSION b/VERSION index abd4105..9e11b32 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.4 +0.3.1 diff --git a/urpmi-proxy.cgi b/urpmi-proxy.cgi index 12475a0..78b4865 100755 --- a/urpmi-proxy.cgi +++ b/urpmi-proxy.cgi @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -## written by Maarten Vanraes (c) 2009-2011 +## written by Maarten Vanraes (c) 2009-2012 ## urpmi-proxy is GPLv2+ use strict; @@ -12,7 +12,7 @@ my $config_file = '/etc/urpmi-proxy.conf'; my $cache_tmp_path = '/var/tmp/urpmi-proxy'; my $cache_path = '/var/cache/urpmi-proxy'; my $logfile = '/var/log/urpmi-proxy.log'; -my $check_updates_only_files = 'MD5SUM'; +my $check_updates_only_files = '(MD5SUM|descriptions)'; my $check_no_updates_files; my $merge_files = 'media.cfg'; my $sources = [ @@ -28,8 +28,8 @@ if (-R $config_file) { my $r = open(FILE, '<', $config_file); if ($r) { my $l = ''; - while () { - $l .= $_; + while (my $i = ) { + $l .= $i; } eval $l; close FILE; @@ -37,10 +37,9 @@ if (-R $config_file) { } print STDERR "logfile: $logfile\n" if $debug; -my $sou; print STDERR "orig sources: " . scalar(@$sources) . "\n" if $debug; if ($debug) { - foreach $sou (@$sources) { + foreach my $sou (@$sources) { print STDERR " - " . $sou . "\n"; } } @@ -63,6 +62,19 @@ print STDERR "file: $file\n" if $debug; print STDERR "dest_path: $dest_path\n" if $debug; print STDERR "filename: $filename\n" if $debug; +# check if there's a time condition +my $modified_since = 0; +if (defined $ENV{HTTP_IF_MODIFIED_SINCE}) { + $modified_since = `date --date="$ENV{HTTP_IF_MODIFIED_SINCE}" "+%s"` if defined $ENV{HTTP_IF_MODIFIED_SINCE}; + $modified_since =~ s/[\s\r\n]*$//; + print STDERR "is modified since: $ENV{HTTP_IF_MODIFIED_SINCE} ($modified_since) ?\n" if $debug && $modified_since; +} + +# set request ENV vars +my $ip = $ENV{REMOTE_ADDR}; +my $user_agent = ''; +$user_agent = $ENV{HTTP_USER_AGENT} if defined $ENV{HTTP_USER_AGENT}; + # check if request needs update checking my $check_file = 1; $check_file = 0 if defined $check_no_updates_files && $filename =~ m/$check_no_updates_files/; @@ -78,22 +90,18 @@ $check_file = 1 if $merge; print STDERR "check_file: $check_file\n" if $debug; print STDERR "merge: $merge\n" if $debug; -# if the file needs no update checks, check in cache -return_file($cache_path, $file, $logfile, 'CACHED_NO_CHECK') if (!$check_file && $filename && -R $cache_path . $file); - # get datetime from local file if it exists -my $timestamp = 0; -if ($filename && !$check_file && -R $cache_path . $file) { - my @sv = lstat($cache_path . $file); - $timestamp = $sv[9]; - print STDERR "timestamp: $timestamp\n" if $debug; +my @stat = lstat($cache_path . $file); +if ($filename && scalar(@stat) > 0) { + print STDERR "timestamp: $stat[9]\n" if $debug; + # if the file needs no update checks, check in cache + return_file($cache_path, $file, $logfile, 'HIT_NO_CHECK', \@stat, $ip, $user_agent) if !$check_file; } # set up curl with timecheck my $curl; my $r = 0; my $file_sent = 0; -#my $file_deleted = 0; my $file_unmodified = 0; my $file_time = -1; my $err = 200; @@ -101,7 +109,7 @@ my $err = 200; # prepare curl transfer my $tmp_file = $cache_tmp_path . "/" . rand() . $$; open(FILEHANDLE, ">", $tmp_file) or do { - _log($logfile, $file, 500, 'MISS'); + _log($logfile, $file, 500, 'MISS', -1, $ip, $user_agent); return_error(500, 'Server error'); }; binmode(FILEHANDLE); @@ -109,7 +117,7 @@ my %curldata = (fh => \*FILEHANDLE, file_sent => \$file_sent, content_type => $f print STDERR "sources: " . scalar(@$sources) . "\n" if $debug; if ($debug) { - foreach $sou (@$sources) { + foreach my $sou (@$sources) { print STDERR " - " . $sou . "\n"; } } @@ -150,7 +158,7 @@ foreach my $s (@$sources) { # check for source print STDERR "interpolated sources: " . scalar(@sources) . "\n" if $debug; if ($debug) { - foreach $sou (@sources) { + foreach my $sou (@sources) { print STDERR " - " . $sou . "\n"; } } @@ -163,8 +171,8 @@ foreach my $source (@sources) { my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); if ($res) { my $mirrorcache = ''; - while () { - $mirrorcache .= $_; + while (my $i = ) { + $mirrorcache .= $i; } close FILE; my $host_loc = $loc; @@ -198,8 +206,8 @@ foreach my $source (@sources) { my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); if ($res) { my $mirrorcache = ''; - while () { - $mirrorcache .= $_; + while (my $i = ) { + $mirrorcache .= $i; } close FILE; my $loc_host = $loc; @@ -229,20 +237,29 @@ foreach my $source (@sources) { } } if ($type eq 'file') { - if ($filename && -R $loc . $file) { + my @statl = lstat($loc . $file); + if ($filename && scalar(@statl) > 0) { my $ft = `file -b --mime-type $loc$file`; $ft =~ s/[\s\r\n]*$//; - print STDERR "mimetype: '$ft'\n" if $debug; - print STDERR "size: " . (-s $loc . $file) . "\n" if $debug; + my $t = localtime($statl[9]); + print STDERR "HTTP Header: 200 OK\n" if $debug; + print STDERR "Content-Type: $ft\n" if $debug; + print STDERR "Content-Length: " . $statl[7] . "\n" if $debug; + print STDERR "Last-Modified: " . $t . "\n" if $debug; $r = open(FILE, "<", $loc . $file); if ($r) { print STDERR "file fetch url '" . $loc . $file . "'\n" if $debug; if (!$file_sent) { - $file_sent = 1; + $file_sent = $statl[7]; + print "Status: 200 OK\r\n"; print "Content-Type: " . $ft . "\r\n"; - print "Content-Length: " . (-s $loc . $file) . "\r\n" if !$merge; + print "Content-Length: " . $statl[7] . "\r\n" if !$merge; + print "Last-Modified: " . $t . "\r\n"; print "\r\n"; } + else { + $file_sent += $statl[7]; + } binmode(FILE); my $buf; while (read(FILE, $buf, 1024)) { @@ -267,10 +284,15 @@ foreach my $source (@sources) { use WWW::Curl::Easy; # set up curl stuff $curl = new WWW::Curl::Easy; - if ($timestamp > 0) { + if (scalar(@stat) > 0 && $stat[9] > $modified_since) { + $curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE + $curl->setopt(CURLOPT_TIMEVALUE, $stat[9]); + } + elsif ($modified_since > 0) { $curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE - $curl->setopt(CURLOPT_TIMEVALUE, $timestamp); + $curl->setopt(CURLOPT_TIMEVALUE, $modified_since); } + $curl->setopt(CURLOPT_USERAGENT, $user_agent) if $user_agent; $curl->setopt(CURLOPT_CONNECTTIMEOUT, $connect_timeout); $curl->setopt(CURLOPT_FTP_RESPONSE_TIMEOUT, $ftp_response_timeout); $curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $max_stall_speed); @@ -294,6 +316,7 @@ foreach my $source (@sources) { if ($err =~ m/^2/ || $err == 304) { if ($curl->getinfo(CURLINFO_CONDITION_UNMET)) { $file_unmodified = 1; + print STDERR "condition unmet\n" if $debug; } $file_time = $curl->getinfo(CURLINFO_FILETIME); } @@ -310,6 +333,7 @@ foreach my $source (@sources) { else { print STDERR "transfer error: this source does not have a type\n" if $debug; } + print STDERR "file_sent: $file_sent\n" if $debug; last if $file_sent && !$merge; } @@ -330,24 +354,34 @@ if ($file_sent && $r == 0 && $err =~ m/^2/ && $filename) { else { print STDERR "WARNING: containing path for '$cache_path$file' could not be created\n"; } - _log($logfile, $file, 200, 'MISS'); + _log($logfile, $file, 200, 'MISS', $file_sent, $ip, $user_agent); } else { unlink($tmp_file); if ($file_sent) { if ($filename) { - _log($logfile, $file, $err, 'MISS_FAIL_SENT'); + _log($logfile, $file, $err, 'MISS_FAIL_SENT', $file_sent, $ip, $user_agent); } else { # It was actually successful, but paths can't be saved... print STDERR "NOTICE: paths cant be saved: '$file'\n" if $debug; - _log($logfile, $file, $err, 'MISS'); + _log($logfile, $file, $err, 'MISS', $file_sent, $ip, $user_agent); } exit 0; } - $extra = '_UNMODIFIED' if $file_unmodified; - return_file($cache_path, $file, $logfile, 'HIT_AFTER_FAIL' . $extra) if $filename && -R $cache_path . $file; - _log($logfile, $file, 404, 'MISS_FAIL'); + if ($file_unmodified) { + $extra = '_UNMODIFIED'; + if ($modified_since > 0 && (scalar(@stat) == 0 || $stat[9] <= $modified_since)) { + # it's been requested, so we can answer unmodified + _log($logfile, $file, 304, 'MISS' . $extra, 0, $ip, $user_agent); + return_error(304, 'Unmodified'); + } + } + else { + $extra = 'AFTER_FAIL'; + } + return_file($cache_path, $file, $logfile, 'HIT' . $extra, \@stat, $ip, $user_agent) if $filename && scalar(@stat) > 0; + _log($logfile, $file, 404, 'MISS_FAIL', -1, $ip, $user_agent); return_error(404, 'File not found'); } @@ -361,6 +395,7 @@ sub header_function { $data->{http_header} = $1 if $ptr =~ m!^HTTP/[0-9.]+\s+(.+?)[\s\r\n]*$!; $data->{content_type} = $1 if $ptr =~ m/^Content-[tT]ype:\s+(.+?)[\s\r\n]*$/; $data->{size} = $1 if $ptr =~ m/^Content-[lL]ength:\s+(.+?)[\s\r\n]*$/; + $data->{date} = $1 if $ptr =~ m/^Last-[mM]odified:\s+(.+?)[\s\r\n]*$/; $data->{size} = $1 if $ptr =~ m/^213\s+(.+?)[\s\r\n]*$/; } return length($ptr); @@ -371,47 +406,59 @@ sub write_function { my $f = ${$data->{fh}}; print $f ($ptr); if (!${$data->{file_sent}}) { - ${$data->{file_sent}} = 1; + ${$data->{file_sent}} = length($ptr); print STDERR "HTTP header: " . $data->{http_header} . "\n" if $debug && defined $data->{http_header}; print STDERR "Content-Type: " . $data->{content_type} . "\n" if $debug && defined $data->{content_type}; print STDERR "Content-Length: " . $data->{size} . "\n" if $debug; + print STDERR "Last-Modified: " . $data->{date} . "\n" if $debug && defined $data->{date}; print "Status: " . $data->{http_header} . "\r\n" if $data->{http_header} && $data->{http_header} !~ m/^2/; print "Content-Type: " . $data->{content_type} . "\r\n" if $data->{content_type}; print "Content-Length: " . $data->{size} . "\r\n" if $data->{size} > -1 && !${$data->{merge}}; + print "Last-Modified: " . $data->{date} . "\r\n" if $data->{date}; print "\r\n"; } + else { + ${$data->{file_sent}} += length($ptr); + } print $ptr; return length($ptr); } sub _log { - my ($logfile, $file, $code, $cached) = @_; + my ($logfile, $file, $code, $cached, $size, $ip, $user_agent) = @_; my $date = `date`; $date =~ s/[\s\r\n]*$//; + $size = '-' if $size < 0; open(FILE, ">>" . $logfile) or return; - print FILE "[" . $date . "] $file - $code - $cached\n"; + print FILE "[" . $date . "] $ip $code $size $cached '$file' '$user_agent'\n"; close(FILE); } sub return_file { - my ($cache_path, $file, $logfile, $cached)=@_; + my ($cache_path, $file, $logfile, $cached, $stat, $ip, $user_agent)=@_; open(FILE, "<", $cache_path . $file) or do { - _log($logfile, $file, 500, $cached); + _log($logfile, $file, 500, $cached, -1, $ip, $user_agent); return_error(500, 'Server error'); }; my $ft = `file -b --mime-type $cache_path$file`; $ft =~ s/[\s\r\n]*$//; - print STDERR "mimetype: '$ft'\n" if $debug; - print STDERR "size: " . (-s $cache_path . $file) . "\n" if $debug; + my $t = localtime($stat->[9]); + print STDERR "HTTP header: 200 OK\n" if $debug; + print STDERR "Content-Type: '$ft'\n" if $debug; + print STDERR "Content-Length: " . $stat->[7] . "\n" if $debug; + print STDERR "Last-Modified: " . $t . "\n" if $debug; + print "Status: 200 OK\r\n"; print "Content-Type: " . $ft . "\r\n"; - print "Content-Length: " . (-s $cache_path . $file) . "\r\n\r\n"; + print "Content-Length: " . $stat->[7] . "\r\n"; + print "Last-Modified: " . $t . "\r\n"; + print "\r\n"; binmode(FILE); my $buf; while (read(FILE, $buf, 1024)) { print $buf; } close FILE; - _log($logfile, $file, 200, $cached); + _log($logfile, $file, 200, $cached, $stat->[7], $ip, $user_agent); exit 0; } diff --git a/urpmi-proxy.conf b/urpmi-proxy.conf index 2017b31..780469d 100644 --- a/urpmi-proxy.conf +++ b/urpmi-proxy.conf @@ -34,9 +34,9 @@ # $cache_tmp_path = '/var/tmp/urpmi-proxy'; ## These files will always be checked for newer versions. -# $check_updates_only_files = 'MD5SUM'; +# $check_updates_only_files = '(MD5SUM|descriptions)$'; -## These files will not be check for newer versions if they exist. +## These files will not be checked for newer versions if they exist. # $check_no_updates_files = undef; ## These files will be merged from every sources. -- cgit v1.2.1