From ef020d58538abb702b8181be990dba6424019b89 Mon Sep 17 00:00:00 2001 From: Maarten Vanraes Date: Sun, 26 Feb 2012 16:11:40 +0000 Subject: Import version 0.2.4 --- urpmi-proxy.cgi | 423 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 423 insertions(+) create mode 100755 urpmi-proxy.cgi (limited to 'urpmi-proxy.cgi') diff --git a/urpmi-proxy.cgi b/urpmi-proxy.cgi new file mode 100755 index 0000000..12475a0 --- /dev/null +++ b/urpmi-proxy.cgi @@ -0,0 +1,423 @@ +#!/usr/bin/perl -w +## written by Maarten Vanraes (c) 2009-2011 +## urpmi-proxy is GPLv2+ + +use strict; +use warnings; + +my $debug = 0; +my $config_file = '/etc/urpmi-proxy.conf'; + +# config defaults +my $cache_tmp_path = '/var/tmp/urpmi-proxy'; +my $cache_path = '/var/cache/urpmi-proxy'; +my $logfile = '/var/log/urpmi-proxy.log'; +my $check_updates_only_files = 'MD5SUM'; +my $check_no_updates_files; +my $merge_files = 'media.cfg'; +my $sources = [ + 'urpmi' +]; +my $connect_timeout = 120; +my $ftp_response_timeout = 30; +my $max_stall_speed = 8192; +my $max_stall_time = 60; + +# load config file +if (-R $config_file) { + my $r = open(FILE, '<', $config_file); + if ($r) { + my $l = ''; + while () { + $l .= $_; + } + eval $l; + close FILE; + } +} +print STDERR "logfile: $logfile\n" if $debug; + +my $sou; +print STDERR "orig sources: " . scalar(@$sources) . "\n" if $debug; +if ($debug) { + foreach $sou (@$sources) { + print STDERR " - " . $sou . "\n"; + } +} + +# prepare cache path +system("mkdir -p $cache_tmp_path"); + +# check for valid request +my $file = $ENV{PATH_INFO}; +return_error(500, 'Server error') if !$file; + +# split up request +return_error(500, 'Server error') if $file !~ m!^(.+)/([^/]*)$!; +my $dest_path = $1; +my $filename = $2; +my $file_type = ""; +my $merge = 0; + +print STDERR "file: $file\n" if $debug; +print STDERR "dest_path: $dest_path\n" if $debug; +print STDERR "filename: $filename\n" if $debug; + +# check if request needs update checking +my $check_file = 1; +$check_file = 0 if defined $check_no_updates_files && $filename =~ m/$check_no_updates_files/; +if (defined $check_updates_only_files) { + $check_file = 0; + $check_file = 1 if $filename =~ m/$check_updates_only_files/; +} + +# check if request needs merging +$merge = 1 if $filename =~ m/$merge_files/; +$check_file = 1 if $merge; + +print STDERR "check_file: $check_file\n" if $debug; +print STDERR "merge: $merge\n" if $debug; + +# if the file needs no update checks, check in cache +return_file($cache_path, $file, $logfile, 'CACHED_NO_CHECK') if (!$check_file && $filename && -R $cache_path . $file); + +# get datetime from local file if it exists +my $timestamp = 0; +if ($filename && !$check_file && -R $cache_path . $file) { + my @sv = lstat($cache_path . $file); + $timestamp = $sv[9]; + print STDERR "timestamp: $timestamp\n" if $debug; +} + +# set up curl with timecheck +my $curl; +my $r = 0; +my $file_sent = 0; +#my $file_deleted = 0; +my $file_unmodified = 0; +my $file_time = -1; +my $err = 200; + +# prepare curl transfer +my $tmp_file = $cache_tmp_path . "/" . rand() . $$; +open(FILEHANDLE, ">", $tmp_file) or do { + _log($logfile, $file, 500, 'MISS'); + return_error(500, 'Server error'); +}; +binmode(FILEHANDLE); +my %curldata = (fh => \*FILEHANDLE, file_sent => \$file_sent, content_type => $file_type, size => -1, merge => \$merge); + +print STDERR "sources: " . scalar(@$sources) . "\n" if $debug; +if ($debug) { + foreach $sou (@$sources) { + print STDERR " - " . $sou . "\n"; + } +} + +# filter out duplicate sources (and expand urpmi) +my @sources; +my %seen; +foreach my $s (@$sources) { + # heh + next if $seen{$s}++; + if ($s eq "urpmi") { + # urpmi support is required + use urpm; + use urpm::cfg; + my $urpm = new urpm(); + urpm::get_global_options($urpm); + my $config = urpm::cfg::load_config($urpm->{config}); + my %s; + foreach my $media (@{$config->{media}}) { + if (!$media->{ignore}) { + if ($media->{mirrorlist} && !($seen{'mirrorlist:' . $media->{mirrorlist}}++)) { + # push mirrorlists now so they'll be first + push @sources, 'mirrorlist://' . $media->{mirrorlist}; + } + elsif ($media->{url} && !($seen{$media->{url}}++)) { + $s{$media->{url}} = 1; + } + } + } + # push the urls + push @sources, keys %s if scalar(keys %s); + } + else { + push @sources, $s; + } +} + +# check for source +print STDERR "interpolated sources: " . scalar(@sources) . "\n" if $debug; +if ($debug) { + foreach $sou (@sources) { + print STDERR " - " . $sou . "\n"; + } +} +foreach my $source (@sources) { + my ($type, @loc) = split('://', $source); + my $loc = join('://', @loc); + print STDERR "source of type $type: '" . $source . "'\n" if $debug; + if ($type eq 'mirrorlist') { + # get exact url from cache and parse + my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); + if ($res) { + my $mirrorcache = ''; + while () { + $mirrorcache .= $_; + } + close FILE; + my $host_loc = $loc; + $host_loc =~ s/\$/\\\$/g; + if ($mirrorcache =~ m/'$host_loc'\s+=>\s+{[\r\n]+\s+'chosen'\s+=>\s+'([^']+)'/m) { + $source = $1; + # rectify source to remove '/distrib/version/arch' + $source =~ s!/[^/]+/[^/]+/[^/]+$!!; + print STDERR "mirrorlist returns source '$source'\n" if $debug; + ($type, @loc) = split('://', $source); + $loc = join('://', @loc); + if (defined $type) { + print STDERR "mirrorlist returns type $type: '" . $source . "'\n" if $debug; + } + else { + print STDERR "transfer error: mirrorlist is no url '" . $source . "'.\n" if $debug; + $type = ''; + } + } + else { + print STDERR "transfer error: mirrorlist has no chosen url '" . $source . "'.\n" if $debug; + $type = ''; + } + } + else { + print STDERR "transfer error: couldn't open mirrorlist cache.\n" if $debug; + } + } + if ($type eq 'rsync') { + # find the equivalent ftp mirror location by hostname + my $res = open(FILE, '<', '/var/cache/urpmi/mirrors.cache'); + if ($res) { + my $mirrorcache = ''; + while () { + $mirrorcache .= $_; + } + close FILE; + my $loc_host = $loc; + $loc_host =~ s!/.+!!; + if ($mirrorcache =~ m!'url'\s+=>\s+'((ftp|http)://$loc_host/[^']+)'!) { + $source = $1; + # rectify source to remove '/distrib/version/arch' + $source =~ s!/[^/]+/[^/]+/[^/]+$!!; + print STDERR "rsync switch returns source '$source'\n" if $debug; + ($type, @loc) = split('://', $source); + $loc = join('://', @loc); + if (defined $type) { + print STDERR "rsync switch returns type $type: '" . $source . "'\n" if $debug; + } + else { + print STDERR "transfer error: rsync switch is no url '" . $source . "'.\n" if $debug; + $type = ''; + } + } + else { + print STDERR "transfer error: rsync switch has no suitable url '" . $source . "'.\n" if $debug; + $type = ''; + } + } + else { + print STDERR "transfer error: couldn't open mirrorlist cache.\n" if $debug; + } + } + if ($type eq 'file') { + if ($filename && -R $loc . $file) { + my $ft = `file -b --mime-type $loc$file`; + $ft =~ s/[\s\r\n]*$//; + print STDERR "mimetype: '$ft'\n" if $debug; + print STDERR "size: " . (-s $loc . $file) . "\n" if $debug; + $r = open(FILE, "<", $loc . $file); + if ($r) { + print STDERR "file fetch url '" . $loc . $file . "'\n" if $debug; + if (!$file_sent) { + $file_sent = 1; + print "Content-Type: " . $ft . "\r\n"; + print "Content-Length: " . (-s $loc . $file) . "\r\n" if !$merge; + print "\r\n"; + } + binmode(FILE); + my $buf; + while (read(FILE, $buf, 1024)) { + print FILEHANDLE $buf; + print $buf; + } + close FILE; + $r = 0; + } + else { + print STDERR "transfer error: couldn't open file '" . $loc . $file . "'.\n" if $debug; + $r = 1; + $err = 404; + } + } + else { + print STDERR "transfer error: couldn't read file '" . $loc . $file . "'.\n" if $debug; + } + } + elsif ($type) { + if (!defined $curl) { + use WWW::Curl::Easy; + # set up curl stuff + $curl = new WWW::Curl::Easy; + if ($timestamp > 0) { + $curl->setopt(CURLOPT_TIMECONDITION, 1); # CURL_TIMECOND_IFMODSINCE + $curl->setopt(CURLOPT_TIMEVALUE, $timestamp); + } + $curl->setopt(CURLOPT_CONNECTTIMEOUT, $connect_timeout); + $curl->setopt(CURLOPT_FTP_RESPONSE_TIMEOUT, $ftp_response_timeout); + $curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $max_stall_speed); + $curl->setopt(CURLOPT_LOW_SPEED_TIME, $max_stall_time); + $curl->setopt(CURLOPT_FOLLOWLOCATION, 1); + $curl->setopt(CURLOPT_FILETIME, 1); + # hook curl transfer functions for local caching + $curl->setopt(CURLOPT_WRITEDATA, \%curldata); + $curl->setopt(CURLOPT_WRITEFUNCTION, \&write_function); + $curl->setopt(CURLOPT_WRITEHEADER, \%curldata); + $curl->setopt(CURLOPT_HEADERFUNCTION, \&header_function); + } + # depending on type check if remote file is newer + print STDERR "curl fetch url '" . $source . $file . "'\n" if $debug; + $curl->setopt(CURLOPT_URL, $source . $file); + $r = $curl->perform; + print STDERR "curl return value: " . $err . "\n" if $debug; + # use curl to get it and output it directly + if ($r == 0) { + $err = $curl->getinfo(CURLINFO_HTTP_CODE); + if ($err =~ m/^2/ || $err == 304) { + if ($curl->getinfo(CURLINFO_CONDITION_UNMET)) { + $file_unmodified = 1; + } + $file_time = $curl->getinfo(CURLINFO_FILETIME); + } + else { + # error stuff ? + print STDERR "transfer error: http code " . $err . "\n" if $debug; + } + } + else { + # error stuff ? + print STDERR "transfer error: " . $curl->strerror($r) . " ($r)\n" if $debug; + } + } + else { + print STDERR "transfer error: this source does not have a type\n" if $debug; + } + last if $file_sent && !$merge; +} + +my $extra = ''; + +close(FILEHANDLE); + +if ($file_sent && $r == 0 && $err =~ m/^2/ && $filename) { + # clean up file and move to correct location + if (system("mkdir -p $cache_path$dest_path") == 0) { + if (rename($tmp_file, $cache_path . $file)) { + utime(time(), $file_time, $cache_path . $file) if $file_time > 0; + } + else { + print STDERR "WARNING: file '$tmp_file' could not be moved to '$cache_path$file'\n"; + } + } + else { + print STDERR "WARNING: containing path for '$cache_path$file' could not be created\n"; + } + _log($logfile, $file, 200, 'MISS'); +} +else { + unlink($tmp_file); + if ($file_sent) { + if ($filename) { + _log($logfile, $file, $err, 'MISS_FAIL_SENT'); + } + else { + # It was actually successful, but paths can't be saved... + print STDERR "NOTICE: paths cant be saved: '$file'\n" if $debug; + _log($logfile, $file, $err, 'MISS'); + } + exit 0; + } + $extra = '_UNMODIFIED' if $file_unmodified; + return_file($cache_path, $file, $logfile, 'HIT_AFTER_FAIL' . $extra) if $filename && -R $cache_path . $file; + _log($logfile, $file, 404, 'MISS_FAIL'); + return_error(404, 'File not found'); +} + +print STDERR "finished." if $debug; + +exit 0; + +sub header_function { + my ($ptr, $data) = @_; + if (!${$data->{file_sent}}) { + $data->{http_header} = $1 if $ptr =~ m!^HTTP/[0-9.]+\s+(.+?)[\s\r\n]*$!; + $data->{content_type} = $1 if $ptr =~ m/^Content-[tT]ype:\s+(.+?)[\s\r\n]*$/; + $data->{size} = $1 if $ptr =~ m/^Content-[lL]ength:\s+(.+?)[\s\r\n]*$/; + $data->{size} = $1 if $ptr =~ m/^213\s+(.+?)[\s\r\n]*$/; + } + return length($ptr); +} + +sub write_function { + my ($ptr, $data) = @_; + my $f = ${$data->{fh}}; + print $f ($ptr); + if (!${$data->{file_sent}}) { + ${$data->{file_sent}} = 1; + print STDERR "HTTP header: " . $data->{http_header} . "\n" if $debug && defined $data->{http_header}; + print STDERR "Content-Type: " . $data->{content_type} . "\n" if $debug && defined $data->{content_type}; + print STDERR "Content-Length: " . $data->{size} . "\n" if $debug; + print "Status: " . $data->{http_header} . "\r\n" if $data->{http_header} && $data->{http_header} !~ m/^2/; + print "Content-Type: " . $data->{content_type} . "\r\n" if $data->{content_type}; + print "Content-Length: " . $data->{size} . "\r\n" if $data->{size} > -1 && !${$data->{merge}}; + print "\r\n"; + } + print $ptr; + return length($ptr); +} + +sub _log { + my ($logfile, $file, $code, $cached) = @_; + my $date = `date`; + $date =~ s/[\s\r\n]*$//; + open(FILE, ">>" . $logfile) or return; + print FILE "[" . $date . "] $file - $code - $cached\n"; + close(FILE); +} + +sub return_file { + my ($cache_path, $file, $logfile, $cached)=@_; + open(FILE, "<", $cache_path . $file) or do { + _log($logfile, $file, 500, $cached); + return_error(500, 'Server error'); + }; + my $ft = `file -b --mime-type $cache_path$file`; + $ft =~ s/[\s\r\n]*$//; + print STDERR "mimetype: '$ft'\n" if $debug; + print STDERR "size: " . (-s $cache_path . $file) . "\n" if $debug; + print "Content-Type: " . $ft . "\r\n"; + print "Content-Length: " . (-s $cache_path . $file) . "\r\n\r\n"; + binmode(FILE); + my $buf; + while (read(FILE, $buf, 1024)) { + print $buf; + } + close FILE; + _log($logfile, $file, 200, $cached); + exit 0; +} + +sub return_error { + my ($code, $text) = @_; + print "Status: $code $text\r\n\r\n"; + print STDERR "$code $text.\n" if $debug; + exit 0; +} -- cgit v1.2.1