aboutsummaryrefslogtreecommitdiffstats
path: root/modules/apache/templates
diff options
context:
space:
mode:
Diffstat (limited to 'modules/apache/templates')
-rw-r--r--modules/apache/templates/logrotate2
-rw-r--r--modules/apache/templates/urlescape9
-rw-r--r--modules/apache/templates/vhost_fcgid.conf21
3 files changed, 25 insertions, 7 deletions
diff --git a/modules/apache/templates/logrotate b/modules/apache/templates/logrotate
index 4d90e47e..823989eb 100644
--- a/modules/apache/templates/logrotate
+++ b/modules/apache/templates/logrotate
@@ -4,7 +4,7 @@
daily
<% elsif @hostname == 'friteuse' %>
# The virtual disk is very small so keep log sizes down
- rotate 52
+ rotate 26
weekly
<% elsif @hostname == 'sucuk' %>
rotate 52
diff --git a/modules/apache/templates/urlescape b/modules/apache/templates/urlescape
new file mode 100644
index 00000000..8feb7fa4
--- /dev/null
+++ b/modules/apache/templates/urlescape
@@ -0,0 +1,9 @@
+#!/usr/bin/python3 -u
+# URL escape each path given on stdin
+import sys
+import urllib.parse
+while True:
+ l = sys.stdin.readline()
+ if not l:
+ break
+ print(urllib.parse.quote(l.rstrip("\n")))
diff --git a/modules/apache/templates/vhost_fcgid.conf b/modules/apache/templates/vhost_fcgid.conf
index 8fc8da5c..f137c866 100644
--- a/modules/apache/templates/vhost_fcgid.conf
+++ b/modules/apache/templates/vhost_fcgid.conf
@@ -17,16 +17,25 @@ RewriteRule . - [R=403,L]
# Block expensive SVN operations on all common robots ("spider" covers a
# bunch). "Expensive" is considered to be most operations other than showing a
# directory or downloading a specific version of a file.
-RewriteCond %{QUERY_STRING} pathrev=|annotate=|view=log|r1=
-RewriteCond %{HTTP_USER_AGENT} "Googlebot|bingbot|Yahoo! Slurp|ClaudeBot|Amazonbot|YandexBot|SemrushBot|Barkrowler|DataForSeoBot|PetalBot|facebookexternalhit|GPTBot|ImagesiftBot|spider|Spider|iPod|Trident|Presto"
+# Note: eliminating view=log and annotate= doesn't make much difference to the
+# CPU load when robots are hitting the server in real world operation.
+RewriteCond %{QUERY_STRING} pathrev=|r1=
+RewriteCond %{HTTP_USER_AGENT} "Googlebot|GoogleOther|bingbot|Yahoo! Slurp|ClaudeBot|Amazonbot|YandexBot|SemrushBot|Barkrowler|DataForSeoBot|PetalBot|facebookexternalhit|GPTBot|ImagesiftBot|spider|Spider|iPod|Trident|Presto"
RewriteRule . - [R=403,L]
+# Only let expensive operations through when a cookie is set. If no cookie is
+# set, redirect to a page where it will be set using JavaScript and redirect
+# back. This will block requests from user agents that do not support
+# JavaScript, which includes many robots.
+RewriteMap urlescape prg:/usr/local/bin/urlescape
+RewriteCond %{QUERY_STRING} pathrev=|r1=
+RewriteCond %{REQUEST_URI} !/_check
+RewriteCond %{HTTP_COOKIE} !session=([^;]+) [novary]
+RewriteRule . %{REQUEST_SCHEME}://%{SERVER_NAME}:%{SERVER_PORT}/_check?to=%{REQUEST_URI}?${urlescape:%{QUERY_STRING}} [R=302,L]
+
# Block abusive spiders by IP address who don't identify themselves in the
# User-Agent: string
-RewriteCond expr "-R '47.76.0.0/14' || -R '47.80.0.0/14' || -R '47.208.0.0/16' || -R '47.238.0.0/16' || -R '8.210.0.0/16' || -R '8.218.0.0/16'"
-RewriteRule . - [R=403,L]
-
-RewriteCond %{QUERY_STRING} "sortby=.*view=(log|patch)"
+RewriteCond expr "-R '47.76.0.0/14' || -R '47.80.0.0/14' || -R '47.208.0.0/16' || -R '47.238.0.0/16' || -R '8.210.0.0/16' || -R '8.218.0.0/16' || -R '188.239.0.0/18' || -R '166.108.192.0/18' || -R '124.243.160.0/19' || -R '101.46.0.0/20'"
RewriteRule . - [R=403,L]
ErrorDocument 403 "<html><body>Impolite robots are not allowed</body></html>"