From 6a4e1eca43b969db22ec8eb5d0f6aa012dec9198 Mon Sep 17 00:00:00 2001 From: Maarten Vanraes Date: Tue, 5 Aug 2014 20:40:55 +0200 Subject: Handle custom HTML errors from mirrors. When mirrors send errors back with some custom HTML instead of a body-less response. From now on, we'll keep a small buffer to see if they are errors and not functional responses. If they are errors, it's not sent to the client so that the next mirror can be tried. Also tidying up the code in a few subroutines and have subsecond logging in debug so that we can tell where the slowness comes from. --- VERSION | 2 +- urpmi-proxy.cgi | 93 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 81 insertions(+), 14 deletions(-) diff --git a/VERSION b/VERSION index 42045ac..1d0ba9e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.4 +0.4.0 diff --git a/urpmi-proxy.cgi b/urpmi-proxy.cgi index 572835b..e1fcf89 100755 --- a/urpmi-proxy.cgi +++ b/urpmi-proxy.cgi @@ -5,6 +5,8 @@ use strict; use warnings; +use Time::HiRes qw( gettimeofday ); + my $debug = 0; my $proxy = 0; my $config_file = '/etc/urpmi-proxy.conf'; @@ -23,6 +25,7 @@ my $connect_timeout = 120; my $ftp_response_timeout = 30; my $max_stall_speed = 8192; my $max_stall_time = 60; +my $start_buffer = 4096; # load config file if (-R $config_file) { @@ -106,6 +109,8 @@ my $file_sent = 0; my $file_unmodified = 0; my $file_time = -1; my $err = 200; +my $buffer; +my $pos; # prepare curl transfer my $tmp_file = $cache_tmp_path . "/" . rand() . $$; @@ -114,7 +119,7 @@ open(FILEHANDLE, ">", $tmp_file) or do { return_error(500, 'Server error'); }; binmode(FILEHANDLE); -my %curldata = (fh => \*FILEHANDLE, file_sent => \$file_sent, content_type => $file_type, size => -1, merge => \$merge); +my %curldata = (fh => \*FILEHANDLE, file_sent => \$file_sent, content_type => $file_type, size => -1, merge => \$merge, buffer => \$buffer, pos => \$pos, max => $start_buffer); print STDERR "sources: " . scalar(@$sources) . "\n" if $debug; if ($debug) { @@ -165,6 +170,9 @@ if ($debug) { } foreach my $source (@sources) { my ($type, @loc) = split('://', $source); + # clear buffer + $buffer = ''; + $pos = 0; my $loc = join('://', @loc); print STDERR "source of type $type: '" . $source . "'\n" if $debug; if ($type eq 'mirrorlist') { @@ -321,6 +329,7 @@ foreach my $source (@sources) { print STDERR "condition unmet\n" if $debug; } $file_time = $curl->getinfo(CURLINFO_FILETIME); + flush_buffer(\%curldata); } else { # error stuff ? @@ -338,6 +347,12 @@ foreach my $source (@sources) { print STDERR "file_sent: $file_sent\n" if $debug; last if $file_sent && !$merge; } +# if file_sent, flush the buffer +if ($file_sent && $pos > 0) { + print STDERR "still in buffer: $pos\n" if $debug; + flush_buffer(\%curldata); + print STDERR "file_sent(after flush): $file_sent\n" if $debug; +} my $extra = ''; @@ -403,27 +418,79 @@ sub header_function { return length($ptr); } +sub print_headers { + my ($data) = @_; + print STDERR "HTTP header: " . $data->{http_header} . "\n" if $debug && defined $data->{http_header}; + print STDERR "Content-Type: " . $data->{content_type} . "\n" if $debug && defined $data->{content_type}; + print STDERR "Content-Length: " . $data->{size} . "\n" if $debug; + print STDERR "Last-Modified: " . $data->{date} . "\n" if $debug && defined $data->{date}; + print "Status: " . $data->{http_header} . "\r\n" if $data->{http_header} && $data->{http_header} !~ m/^2/; + print "Content-Type: " . $data->{content_type} . "\r\n" if $data->{content_type}; + print "Content-Length: " . $data->{size} . "\r\n" if $data->{size} > -1 && !${$data->{merge}}; + print "Last-Modified: " . $data->{date} . "\r\n" if $data->{date}; + print "\r\n"; +} + +sub flush_buffer { + my ($data) = @_; + if (${$data->{pos}} > 0) { + print STDERR "flushing buffer\n" if $debug; + my $f = ${$data->{fh}}; + # output to file + print $f (${$data->{buffer}}); + # set file_sent and print headers if required + if (!${$data->{file_sent}}) { + ${$data->{file_sent}} = ${$data->{pos}}; + print_headers($data); + } + else { + ${$data->{file_sent}} += ${$data->{pos}}; + } + # output + print ${$data->{buffer}}; + # clear buffer + ${$data->{buffer}} = ''; + ${$data->{pos}} = 0; + } +} + sub write_function { my ($ptr, $data) = @_; + my $l = length($ptr); my $f = ${$data->{fh}}; + my ($sec, $usec) = gettimeofday; + print STDERR "[$sec,$usec]: write($l,${$data->{pos}},$data->{max},${$data->{file_sent}})\n" if $debug; + # if not sent yet and we can fit in the data in the buffer, we'll use the buffer instead and do nothing + if (!${$data->{file_sent}} && (${$data->{pos}} + $l < $data->{max})) { + ${$data->{buffer}} .= $ptr; + ${$data->{pos}} += $l; + return $l; + } + ($sec, $usec) = gettimeofday; + print STDERR "[$sec,$usec]: write: flush\n" if $debug; + # now that we are printing, flush the buffer + flush_buffer($data); + ($sec, $usec) = gettimeofday; + print STDERR "[$sec,$usec]: write: print file\n" if $debug; + # output to file print $f ($ptr); + # set file_sent and print headers if required if (!${$data->{file_sent}}) { - ${$data->{file_sent}} = length($ptr); - print STDERR "HTTP header: " . $data->{http_header} . "\n" if $debug && defined $data->{http_header}; - print STDERR "Content-Type: " . $data->{content_type} . "\n" if $debug && defined $data->{content_type}; - print STDERR "Content-Length: " . $data->{size} . "\n" if $debug; - print STDERR "Last-Modified: " . $data->{date} . "\n" if $debug && defined $data->{date}; - print "Status: " . $data->{http_header} . "\r\n" if $data->{http_header} && $data->{http_header} !~ m/^2/; - print "Content-Type: " . $data->{content_type} . "\r\n" if $data->{content_type}; - print "Content-Length: " . $data->{size} . "\r\n" if $data->{size} > -1 && !${$data->{merge}}; - print "Last-Modified: " . $data->{date} . "\r\n" if $data->{date}; - print "\r\n"; + ($sec, $usec) = gettimeofday; + print STDERR "[$sec,$usec]: write: headers\n" if $debug; + ${$data->{file_sent}} = $l; + print_headers($data); } else { - ${$data->{file_sent}} += length($ptr); + ${$data->{file_sent}} += $l; } + ($sec, $usec) = gettimeofday; + print STDERR "[$sec,$usec]: write: print STDOUT\n" if $debug; + # output print $ptr; - return length($ptr); + ($sec, $usec) = gettimeofday; + print STDERR "[$sec,$usec]: write: return\n" if $debug; + return $l; } sub _log { -- cgit v1.2.1