aboutsummaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
Diffstat (limited to 'modules')
-rw-r--r--modules/apache/manifests/mod/fcgid.pp7
-rw-r--r--modules/apache/templates/urlescape9
-rw-r--r--modules/apache/templates/vhost_fcgid.conf21
-rwxr-xr-xmodules/bugzilla/manifests/init.pp13
-rw-r--r--modules/buildsystem/manifests/iurt/config.pp6
-rw-r--r--modules/viewvc/files/setcookieredirect.html28
-rw-r--r--modules/viewvc/manifests/init.pp9
-rw-r--r--modules/youri-check/manifests/init.pp6
8 files changed, 81 insertions, 18 deletions
diff --git a/modules/apache/manifests/mod/fcgid.pp b/modules/apache/manifests/mod/fcgid.pp
index 6c815681..b8186a64 100644
--- a/modules/apache/manifests/mod/fcgid.pp
+++ b/modules/apache/manifests/mod/fcgid.pp
@@ -1,4 +1,11 @@
class apache::mod::fcgid {
include apache::base
package { 'apache-mod_fcgid': }
+
+ file { 'urlescape':
+ path => '/usr/local/bin/urlescape',
+ mode => '0755',
+ notify => Service['apache'],
+ content => template('apache/urlescape'),
+ }
}
diff --git a/modules/apache/templates/urlescape b/modules/apache/templates/urlescape
new file mode 100644
index 00000000..8feb7fa4
--- /dev/null
+++ b/modules/apache/templates/urlescape
@@ -0,0 +1,9 @@
+#!/usr/bin/python3 -u
+# URL escape each path given on stdin
+import sys
+import urllib.parse
+while True:
+ l = sys.stdin.readline()
+ if not l:
+ break
+ print(urllib.parse.quote(l.rstrip("\n")))
diff --git a/modules/apache/templates/vhost_fcgid.conf b/modules/apache/templates/vhost_fcgid.conf
index 8fc8da5c..f137c866 100644
--- a/modules/apache/templates/vhost_fcgid.conf
+++ b/modules/apache/templates/vhost_fcgid.conf
@@ -17,16 +17,25 @@ RewriteRule . - [R=403,L]
# Block expensive SVN operations on all common robots ("spider" covers a
# bunch). "Expensive" is considered to be most operations other than showing a
# directory or downloading a specific version of a file.
-RewriteCond %{QUERY_STRING} pathrev=|annotate=|view=log|r1=
-RewriteCond %{HTTP_USER_AGENT} "Googlebot|bingbot|Yahoo! Slurp|ClaudeBot|Amazonbot|YandexBot|SemrushBot|Barkrowler|DataForSeoBot|PetalBot|facebookexternalhit|GPTBot|ImagesiftBot|spider|Spider|iPod|Trident|Presto"
+# Note: eliminating view=log and annotate= doesn't make much difference to the
+# CPU load when robots are hitting the server in real world operation.
+RewriteCond %{QUERY_STRING} pathrev=|r1=
+RewriteCond %{HTTP_USER_AGENT} "Googlebot|GoogleOther|bingbot|Yahoo! Slurp|ClaudeBot|Amazonbot|YandexBot|SemrushBot|Barkrowler|DataForSeoBot|PetalBot|facebookexternalhit|GPTBot|ImagesiftBot|spider|Spider|iPod|Trident|Presto"
RewriteRule . - [R=403,L]
+# Only let expensive operations through when a cookie is set. If no cookie is
+# set, redirect to a page where it will be set using JavaScript and redirect
+# back. This will block requests from user agents that do not support
+# JavaScript, which includes many robots.
+RewriteMap urlescape prg:/usr/local/bin/urlescape
+RewriteCond %{QUERY_STRING} pathrev=|r1=
+RewriteCond %{REQUEST_URI} !/_check
+RewriteCond %{HTTP_COOKIE} !session=([^;]+) [novary]
+RewriteRule . %{REQUEST_SCHEME}://%{SERVER_NAME}:%{SERVER_PORT}/_check?to=%{REQUEST_URI}?${urlescape:%{QUERY_STRING}} [R=302,L]
+
# Block abusive spiders by IP address who don't identify themselves in the
# User-Agent: string
-RewriteCond expr "-R '47.76.0.0/14' || -R '47.80.0.0/14' || -R '47.208.0.0/16' || -R '47.238.0.0/16' || -R '8.210.0.0/16' || -R '8.218.0.0/16'"
-RewriteRule . - [R=403,L]
-
-RewriteCond %{QUERY_STRING} "sortby=.*view=(log|patch)"
+RewriteCond expr "-R '47.76.0.0/14' || -R '47.80.0.0/14' || -R '47.208.0.0/16' || -R '47.238.0.0/16' || -R '8.210.0.0/16' || -R '8.218.0.0/16' || -R '188.239.0.0/18' || -R '166.108.192.0/18' || -R '124.243.160.0/19' || -R '101.46.0.0/20'"
RewriteRule . - [R=403,L]
ErrorDocument 403 "<html><body>Impolite robots are not allowed</body></html>"
diff --git a/modules/bugzilla/manifests/init.pp b/modules/bugzilla/manifests/init.pp
index 5da26c07..e66ddf0e 100755
--- a/modules/bugzilla/manifests/init.pp
+++ b/modules/bugzilla/manifests/init.pp
@@ -178,12 +178,13 @@ class bugzilla {
mode => '0750',
}
- cron { 'collectstats':
- command => "cd $bugzilla_location && ./collectstats.pl",
- user => 'apache',
- hour => 2,
- minute => 30,
- }
+# Improper file permissions makes this fail, and nobody seems to care
+# cron { 'collectstats':
+# command => "cd $bugzilla_location && ./collectstats.pl",
+# user => 'apache',
+# hour => 2,
+# minute => 30,
+# }
cron { 'clean-bug-user-last-visit':
command => "cd $bugzilla_location && ./clean-bug-user-last-visit.pl",
diff --git a/modules/buildsystem/manifests/iurt/config.pp b/modules/buildsystem/manifests/iurt/config.pp
index be440e39..b8be373e 100644
--- a/modules/buildsystem/manifests/iurt/config.pp
+++ b/modules/buildsystem/manifests/iurt/config.pp
@@ -20,9 +20,9 @@ define buildsystem::iurt::config() {
'java-latest-openjdk' => 172800,
'kernel' => 115200,
'libreoffice' => 432000,
- 'llvm' => 86400,
- 'llvm17-suite' => 86400,
- 'llvm19-suite' => 86400,
+ 'llvm' => 115200,
+ 'llvm17-suite' => 115200,
+ 'llvm19-suite' => 115200,
'openfoam' => 115200,
'paraview' => 115200,
'qgis' => 57600,
diff --git a/modules/viewvc/files/setcookieredirect.html b/modules/viewvc/files/setcookieredirect.html
new file mode 100644
index 00000000..fe98b9dc
--- /dev/null
+++ b/modules/viewvc/files/setcookieredirect.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html>
+<html>
+ <head>
+ <title>User check</title>
+ <script type="text/javascript" defer>
+ const randomValue = "6436"; // Chosen by fair dice roll. Guaranteed to be random.
+ document.cookie = `session=${randomValue}; path=/; expires=${new Date(Date.now() + 24*3600*1000).toUTCString()}`;
+ const params = new Proxy(new URLSearchParams(window.location.search), {
+ get: (searchParams, prop) => searchParams.get(prop),
+ });
+ let path = params.to;
+ // Sanitize redirect path to avoid malicious arbitrary redirects
+ if (/^\/[-a-zA-Z0-9~_.?&=/+]*$/.test(decodeURIComponent(path))) {
+ const current = new URL(window.location.toLocaleString());
+ window.location.href = encodeURI(current.origin + decodeURIComponent(path));
+ } else {
+ window.onload = function() {
+ document.getElementById('error').innerHTML = 'Error! Bad redirect location!';
+ }
+ }
+ </script>
+ </head>
+ <body>
+ Redirecting back...
+ <br>
+ <p id="error"><!-- space for error message --></p>
+ </body>
+</html>
diff --git a/modules/viewvc/manifests/init.pp b/modules/viewvc/manifests/init.pp
index 99acec90..e1d336c9 100644
--- a/modules/viewvc/manifests/init.pp
+++ b/modules/viewvc/manifests/init.pp
@@ -40,9 +40,18 @@ class viewvc {
source => 'puppet:///modules/viewvc/robots.txt',
}
+ file { "$viewvc_docroot/setcookieredirect.html":
+ ensure => present,
+ mode => '0644',
+ owner => root,
+ group => root,
+ source => 'puppet:///modules/viewvc/setcookieredirect.html',
+ }
+
$vhost_aliases = {
'/viewvc' => $viewvc_docroot,
'/robots.txt' => $robotsfile,
+ '/_check' => "$viewvc_docroot/setcookieredirect.html",
}
$script_aliases = {
diff --git a/modules/youri-check/manifests/init.pp b/modules/youri-check/manifests/init.pp
index aef33d17..1a992113 100644
--- a/modules/youri-check/manifests/init.pp
+++ b/modules/youri-check/manifests/init.pp
@@ -40,9 +40,9 @@ class youri-check {
$pgsql_server = $base::pgsql_server
$pgsql_user = "youri${version}"
$pgsql_password = extlookup('youri_pgsql','x')
- # We want to alert to packages older than last mass rebuild
- # 1646092800 is 2022-03-01 (get it with "TZ=UTC date -d2022-03-01 +%s")
- $max_days = (time() - 1646092800)/(24*3600)
+ # We want to alert for packages older than the cut-off for latest mass rebuild
+ # 1745605215 is 2025-04-25
+ $max_days = (time() - 1745605215)/(24*3600)
file { "${config}":
ensure => present,