aboutsummaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
Diffstat (limited to 'modules')
-rw-r--r--modules/apache/templates/logrotate2
-rw-r--r--modules/apache/templates/vhost_fcgid.conf35
-rw-r--r--modules/apache/templates/vhost_fcgid_norobot.conf45
-rwxr-xr-xmodules/bugzilla/manifests/init.pp13
-rw-r--r--modules/viewvc/files/setcookieredirect.html4
-rw-r--r--modules/viewvc/manifests/init.pp4
-rw-r--r--modules/youri-check/manifests/init.pp8
7 files changed, 61 insertions, 50 deletions
diff --git a/modules/apache/templates/logrotate b/modules/apache/templates/logrotate
index 4d90e47e..823989eb 100644
--- a/modules/apache/templates/logrotate
+++ b/modules/apache/templates/logrotate
@@ -4,7 +4,7 @@
daily
<% elsif @hostname == 'friteuse' %>
# The virtual disk is very small so keep log sizes down
- rotate 52
+ rotate 26
weekly
<% elsif @hostname == 'sucuk' %>
rotate 52
diff --git a/modules/apache/templates/vhost_fcgid.conf b/modules/apache/templates/vhost_fcgid.conf
index f137c866..fefa4a49 100644
--- a/modules/apache/templates/vhost_fcgid.conf
+++ b/modules/apache/templates/vhost_fcgid.conf
@@ -4,38 +4,3 @@ AddHandler fcgid-script .pl
<%- end -%>
FcgidMinProcessesPerClass <%= @process %>
FcgidIdleTimeout 30
-
-# These robots were scraping the whole of svnweb in 2024-04, causing severe
-# load, so they are banned. It's not clear whether they obey robots.txt or
-# not (we didn't give them enough of a chance to find out), so we could
-# consider giving them a chance to redeem themselves at some point in the
-# future.
-RewriteEngine on
-RewriteCond %{HTTP_USER_AGENT} ClaudeBot|Amazonbot
-RewriteRule . - [R=403,L]
-
-# Block expensive SVN operations on all common robots ("spider" covers a
-# bunch). "Expensive" is considered to be most operations other than showing a
-# directory or downloading a specific version of a file.
-# Note: eliminating view=log and annotate= doesn't make much difference to the
-# CPU load when robots are hitting the server in real world operation.
-RewriteCond %{QUERY_STRING} pathrev=|r1=
-RewriteCond %{HTTP_USER_AGENT} "Googlebot|GoogleOther|bingbot|Yahoo! Slurp|ClaudeBot|Amazonbot|YandexBot|SemrushBot|Barkrowler|DataForSeoBot|PetalBot|facebookexternalhit|GPTBot|ImagesiftBot|spider|Spider|iPod|Trident|Presto"
-RewriteRule . - [R=403,L]
-
-# Only let expensive operations through when a cookie is set. If no cookie is
-# set, redirect to a page where it will be set using JavaScript and redirect
-# back. This will block requests from user agents that do not support
-# JavaScript, which includes many robots.
-RewriteMap urlescape prg:/usr/local/bin/urlescape
-RewriteCond %{QUERY_STRING} pathrev=|r1=
-RewriteCond %{REQUEST_URI} !/_check
-RewriteCond %{HTTP_COOKIE} !session=([^;]+) [novary]
-RewriteRule . %{REQUEST_SCHEME}://%{SERVER_NAME}:%{SERVER_PORT}/_check?to=%{REQUEST_URI}?${urlescape:%{QUERY_STRING}} [R=302,L]
-
-# Block abusive spiders by IP address who don't identify themselves in the
-# User-Agent: string
-RewriteCond expr "-R '47.76.0.0/14' || -R '47.80.0.0/14' || -R '47.208.0.0/16' || -R '47.238.0.0/16' || -R '8.210.0.0/16' || -R '8.218.0.0/16' || -R '188.239.0.0/18' || -R '166.108.192.0/18' || -R '124.243.160.0/19' || -R '101.46.0.0/20'"
-RewriteRule . - [R=403,L]
-
-ErrorDocument 403 "<html><body>Impolite robots are not allowed</body></html>"
diff --git a/modules/apache/templates/vhost_fcgid_norobot.conf b/modules/apache/templates/vhost_fcgid_norobot.conf
new file mode 100644
index 00000000..0643cac9
--- /dev/null
+++ b/modules/apache/templates/vhost_fcgid_norobot.conf
@@ -0,0 +1,45 @@
+AddHandler fcgid-script .pl
+<%- @script_aliases.keys.sort {|a,b| a.size <=> b.size }.reverse.each do |key| -%>
+ ScriptAlias <%= key %> <%= @script_aliases[key] %>
+<%- end -%>
+FcgidMinProcessesPerClass <%= @process %>
+FcgidIdleTimeout 30
+
+# These robots were scraping the whole of svnweb in 2024-04, causing severe
+# load, so they are banned. It's not clear whether they obey robots.txt or
+# not (we didn't give them enough of a chance to find out), so we could
+# consider giving them a chance to redeem themselves at some point in the
+# future.
+RewriteEngine on
+RewriteCond %{HTTP_USER_AGENT} ClaudeBot|Amazonbot
+RewriteRule . - [R=403,L]
+
+# Block expensive SVN operations on all common robots ("spider" covers a
+# bunch). "Expensive" is considered to be most operations other than showing a
+# directory or downloading a specific version of a file.
+# Note: eliminating view=log and annotate= doesn't make much difference to the
+# CPU load when robots are hitting the server in real world operation.
+#RewriteCond %{QUERY_STRING} pathrev=|r1=
+# Treat anything other than a plain path as "expensive"
+RewriteCond %{QUERY_STRING} .
+RewriteCond %{HTTP_USER_AGENT} "Googlebot|GoogleOther|bingbot|Yahoo! Slurp|ClaudeBot|Amazonbot|YandexBot|SemrushBot|Barkrowler|DataForSeoBot|PetalBot|facebookexternalhit|GPTBot|ImagesiftBot|spider|Spider|iPod|Trident|Presto"
+RewriteRule . - [R=403,L]
+
+# Only let expensive operations through when a cookie is set. If no cookie is
+# set, redirect to a page where it will be set using JavaScript and redirect
+# back. This will block requests from user agents that do not support
+# JavaScript, which includes many robots.
+RewriteMap urlescape prg:/usr/local/bin/urlescape
+#RewriteCond %{QUERY_STRING} pathrev=|r1=
+# Treat anything other than a plain path as "expensive"
+RewriteCond %{QUERY_STRING} .
+RewriteCond %{REQUEST_URI} !/_check
+RewriteCond %{HTTP_COOKIE} !session=([^;]+) [novary]
+RewriteRule . %{REQUEST_SCHEME}://%{SERVER_NAME}:%{SERVER_PORT}/_check?to=%{REQUEST_URI}?${urlescape:%{QUERY_STRING}} [R=302,L]
+
+# Block abusive spiders by IP address who don't identify themselves in the
+# User-Agent: string
+RewriteCond expr "-R '47.76.0.0/14' || -R '47.80.0.0/14' || -R '47.208.0.0/16' || -R '47.238.0.0/16' || -R '8.210.0.0/16' || -R '8.218.0.0/16' || -R '188.239.0.0/18' || -R '166.108.192.0/18' || -R '124.243.160.0/19' || -R '101.46.0.0/20'"
+RewriteRule . - [R=403,L]
+
+ErrorDocument 403 "<html><body>Impolite robots are not allowed</body></html>"
diff --git a/modules/bugzilla/manifests/init.pp b/modules/bugzilla/manifests/init.pp
index 5da26c07..e66ddf0e 100755
--- a/modules/bugzilla/manifests/init.pp
+++ b/modules/bugzilla/manifests/init.pp
@@ -178,12 +178,13 @@ class bugzilla {
mode => '0750',
}
- cron { 'collectstats':
- command => "cd $bugzilla_location && ./collectstats.pl",
- user => 'apache',
- hour => 2,
- minute => 30,
- }
+# Improper file permissions makes this fail, and nobody seems to care
+# cron { 'collectstats':
+# command => "cd $bugzilla_location && ./collectstats.pl",
+# user => 'apache',
+# hour => 2,
+# minute => 30,
+# }
cron { 'clean-bug-user-last-visit':
command => "cd $bugzilla_location && ./clean-bug-user-last-visit.pl",
diff --git a/modules/viewvc/files/setcookieredirect.html b/modules/viewvc/files/setcookieredirect.html
index 17322c18..fe98b9dc 100644
--- a/modules/viewvc/files/setcookieredirect.html
+++ b/modules/viewvc/files/setcookieredirect.html
@@ -10,9 +10,9 @@
});
let path = params.to;
// Sanitize redirect path to avoid malicious arbitrary redirects
- if (/^\/[-a-zA-Z0-9~_.?&=/+]*$/.test(decodeURI(path))) {
+ if (/^\/[-a-zA-Z0-9~_.?&=/+]*$/.test(decodeURIComponent(path))) {
const current = new URL(window.location.toLocaleString());
- window.location.href = current.origin + encodeURI(decodeURI(path));
+ window.location.href = encodeURI(current.origin + decodeURIComponent(path));
} else {
window.onload = function() {
document.getElementById('error').innerHTML = 'Error! Bad redirect location!';
diff --git a/modules/viewvc/manifests/init.pp b/modules/viewvc/manifests/init.pp
index e1d336c9..bd676f29 100644
--- a/modules/viewvc/manifests/init.pp
+++ b/modules/viewvc/manifests/init.pp
@@ -62,13 +62,13 @@ class viewvc {
apache::vhost::base { $viewvc::var::hostname:
aliases => $vhost_aliases,
- content => template('apache/vhost_fcgid.conf'),
+ content => template('apache/vhost_fcgid_norobot.conf'),
}
apache::vhost::base { "ssl_${viewvc::var::hostname}":
vhost => $viewvc::var::hostname,
use_ssl => true,
aliases => $vhost_aliases,
- content => template('apache/vhost_fcgid.conf'),
+ content => template('apache/vhost_fcgid_norobot.conf'),
}
}
diff --git a/modules/youri-check/manifests/init.pp b/modules/youri-check/manifests/init.pp
index aef33d17..ebdaa492 100644
--- a/modules/youri-check/manifests/init.pp
+++ b/modules/youri-check/manifests/init.pp
@@ -40,9 +40,9 @@ class youri-check {
$pgsql_server = $base::pgsql_server
$pgsql_user = "youri${version}"
$pgsql_password = extlookup('youri_pgsql','x')
- # We want to alert to packages older than last mass rebuild
- # 1646092800 is 2022-03-01 (get it with "TZ=UTC date -d2022-03-01 +%s")
- $max_days = (time() - 1646092800)/(24*3600)
+ # We want to alert for packages older than the cut-off for latest mass rebuild
+ # 1745539200 is 2025-04-25
+ $max_days = (time() - 1745539200)/(24*3600)
file { "${config}":
ensure => present,
@@ -86,7 +86,7 @@ class youri-check {
user => $pgsql_user,
}
cron { "check_${version}":
- command => "youri-check -c ${config} test",
+ command => "youri-check -c ${config} --parallel test",
hour => $hour,
minute => $minute,
user => $base::user,