Add robots.txt to our list servers
We've noticed that our uwsgi queues are filling up and a lot of requests are being made to robots.txt which ends up 500/503 erroring. Add a robots.txt file which allows crawling of our lists and archives with a delay value in hopes this will cause bots to cache results and not fill up the queue with repetetive requests. Change-Id: I660d8d43f6b2d96663212d93ec48e67d86e9e761
This commit is contained in:
parent
481f128257
commit
c499b57e16
7
playbooks/roles/mailman3/files/robots.txt
Normal file
7
playbooks/roles/mailman3/files/robots.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
User-agent: *
|
||||||
|
|
||||||
|
Disallow: /accounts/*
|
||||||
|
Allow: /archives/*
|
||||||
|
Allow: /mailman3/lists/*
|
||||||
|
|
||||||
|
Crawl-delay: 2
|
@ -141,6 +141,22 @@
|
|||||||
shell:
|
shell:
|
||||||
cmd: docker image prune -f
|
cmd: docker image prune -f
|
||||||
|
|
||||||
|
- name: Create robots.txt location dir
|
||||||
|
file:
|
||||||
|
path: /var/www/robots
|
||||||
|
state: directory
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0755'
|
||||||
|
|
||||||
|
- name: Copy the robots.txt
|
||||||
|
copy:
|
||||||
|
src: robots.txt
|
||||||
|
dest: /var/www/robots/robots.txt
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: '0644'
|
||||||
|
|
||||||
- name: Install apache2
|
- name: Install apache2
|
||||||
package:
|
package:
|
||||||
name:
|
name:
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
</Location>
|
</Location>
|
||||||
|
|
||||||
RewriteEngine On
|
RewriteEngine On
|
||||||
|
RewriteRule ^/robots.txt$ /var/www/robots/robots.txt [L]
|
||||||
RewriteRule "/pipermail/(.*)" "/var/lib/mailman/web-data/mm2archives/%{HTTP_HOST}/public/$1"
|
RewriteRule "/pipermail/(.*)" "/var/lib/mailman/web-data/mm2archives/%{HTTP_HOST}/public/$1"
|
||||||
RewriteRule "/cgi-bin/mailman/listinfo/(.*)" "https://%{HTTP_HOST}/mailman3/lists/$1.%{HTTP_HOST}/"
|
RewriteRule "/cgi-bin/mailman/listinfo/(.*)" "https://%{HTTP_HOST}/mailman3/lists/$1.%{HTTP_HOST}/"
|
||||||
RewriteRule "/cgi-bin/mailman/listinfo" "https://%{HTTP_HOST}/mailman3/lists/"
|
RewriteRule "/cgi-bin/mailman/listinfo" "https://%{HTTP_HOST}/mailman3/lists/"
|
||||||
@ -66,4 +67,8 @@
|
|||||||
Allow from all
|
Allow from all
|
||||||
Require all granted
|
Require all granted
|
||||||
</Directory>
|
</Directory>
|
||||||
|
|
||||||
|
<Directory "/var/www/robots">
|
||||||
|
Require all granted
|
||||||
|
</Directory>
|
||||||
</VirtualHost>
|
</VirtualHost>
|
||||||
|
@ -36,6 +36,13 @@ def test_apache2_listening(host):
|
|||||||
apache2_https = host.socket("tcp://0.0.0.0:443")
|
apache2_https = host.socket("tcp://0.0.0.0:443")
|
||||||
assert apache2_https.is_listening
|
assert apache2_https.is_listening
|
||||||
|
|
||||||
|
def test_robots(host):
|
||||||
|
cmd = host.run('curl --insecure '
|
||||||
|
'--resolve lists.opendev.org:443:127.0.0.1 '
|
||||||
|
'https://lists.opendev.org/robots.txt')
|
||||||
|
assert 'Disallow: /accounts/*' in cmd.stdout
|
||||||
|
assert 'Allow: /archives/*' in cmd.stdout
|
||||||
|
|
||||||
def test_mailman3_screenshots(host):
|
def test_mailman3_screenshots(host):
|
||||||
shots = (
|
shots = (
|
||||||
("https://lists.opendev.org:443", None, "mm3-opendev-main.png"),
|
("https://lists.opendev.org:443", None, "mm3-opendev-main.png"),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user