aboutsummaryrefslogtreecommitdiffstats
path: root/modules/apache/templates/vhost_fcgid.conf
blob: 3aed1ea2e7e1778dd20fb7944f2f14045620d030 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
AddHandler fcgid-script .pl
<%- @script_aliases.keys.sort {|a,b| a.size <=> b.size }.reverse.each do |key| -%>
        ScriptAlias <%= key %> <%= @script_aliases[key] %>
<%- end -%>
FcgidMinProcessesPerClass <%= @process %>
FcgidIdleTimeout 30

# These robots were scraping the whole of svnweb in 2024-04, causing severe
# load, so they are banned.  It's not clear whether they obey robots.txt or
# not (we didn't give them enough of a chance to find out), so we could
# consider giving them a chance to redeem themselves at some point in the
# future.
RewriteEngine on
RewriteCond %{HTTP_USER_AGENT} ClaudeBot|Amazonbot
RewriteRule . - [R=403,L]

# Block expensive SVN operations on all common robots ("spider" covers a
# bunch). "Expensive" is considered to be most operations other than showing a
# directory or downloading a specific version of a file.
# Note: eliminating view=log and annotate= doesn't make much difference to the
# CPU load when robots are hitting the server in real world operation.
RewriteCond %{QUERY_STRING} pathrev=|r1=
RewriteCond %{HTTP_USER_AGENT} "Googlebot|GoogleOther|bingbot|Yahoo! Slurp|ClaudeBot|Amazonbot|YandexBot|SemrushBot|Barkrowler|DataForSeoBot|PetalBot|facebookexternalhit|GPTBot|ImagesiftBot|spider|Spider|iPod|Trident|Presto"
RewriteRule . - [R=403,L]

# Only let expensive operations through when a cookie is set. If no cookie is
# set, redirect to a page where it will be set using JavaScript and redirect
# back. This will block requests from user agents that do not support
# JavaScript, which includes many robots.
RewriteCond %{QUERY_STRING} pathrev=|r1=
RewriteCond %{REQUEST_URI} !/_check
RewriteCond %{HTTP_COOKIE} !session=([^;]+) [novary]
RewriteRule . %{REQUEST_SCHEME}://%{SERVER_NAME}:%{SERVER_PORT}/_check?to=%{REQUEST_URI}?%{QUERY_STRING} [R=302,L]

# Block abusive spiders by IP address who don't identify themselves in the
# User-Agent: string
RewriteCond expr "-R '47.76.0.0/14' || -R '47.80.0.0/14' || -R '47.208.0.0/16' || -R '47.238.0.0/16' || -R '8.210.0.0/16' || -R '8.218.0.0/16' || -R '188.239.0.0/18' || -R '166.108.192.0/18' || -R '124.243.160.0/19' || -R '101.46.0.0/20'"
RewriteRule . - [R=403,L]

ErrorDocument 403 "<html><body>Impolite robots are not allowed</body></html>"