From 8d0d6155ed71a1ec0b3f1964190b845a487a4ca0 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Wed, 1 Jul 2020 12:54:00 +1000 Subject: [PATCH] gitea: crawler UA reject rules As described inline, this crawler is causing us problems as it hits the backends indiscriminately. Block it via the known UA strings, which luckily are old so should not cause real client issues. Change-Id: I0d78a8b625b69f600e00e8b3ea64576e0fdb84d9 --- .../roles/gitea/templates/gitea.vhost.j2 | 45 +++++++++++++++++++ testinfra/test_gitea.py | 7 +++ 2 files changed, 52 insertions(+) diff --git a/playbooks/roles/gitea/templates/gitea.vhost.j2 b/playbooks/roles/gitea/templates/gitea.vhost.j2 index b687f9e6ac..f700f71069 100644 --- a/playbooks/roles/gitea/templates/gitea.vhost.j2 +++ b/playbooks/roles/gitea/templates/gitea.vhost.j2 @@ -24,6 +24,51 @@ Listen 3081 SSLProxyEngine on + RewriteEngine On + + # This interesting list is taken from + # + # https://github.com/mythsman/weiboCrawler/blob/master/opener.py + # + # which appears to be a crawler for a site "weibo" which is crawling + # gitea relentlessly. It seems to rotate though these to avoid + # detection. We are seeing these very specific user-agents appear + # suggesting this code has been repurposed or has otherwise gone mad + # and started going through opendev.org + + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1" [OR] + RewriteCond %{HTTP_USER_AGENT} "=MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)" [OR] + RewriteCond %{HTTP_USER_AGENT} "=UCWEB7.0.2.37/28/999" [OR] + RewriteCond %{HTTP_USER_AGENT} "=NOKIA5700/ UCWEB7.0.2.37/28/999" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Openwave/ UCWEB7.0.2.37/28/999" [OR] + RewriteCond %{HTTP_USER_AGENT} "=Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999" + RewriteRule . - [R=403,L] + ProxyPass / https://localhost:3000/ retry=0 ProxyPassReverse / https://localhost:3000/ diff --git a/testinfra/test_gitea.py b/testinfra/test_gitea.py index e56f1f6b32..bcebbd4e5e 100644 --- a/testinfra/test_gitea.py +++ b/testinfra/test_gitea.py @@ -47,3 +47,10 @@ def test_proxy(host): '--resolve gitea99.opendev.org:3081:127.0.0.1 ' 'https://gitea99.opendev.org:3081/') assert 'Git with a cup of tea' in cmd.stdout + +def test_proxy_ua_blacklist(host): + cmd = host.run('curl --insecure -A ' + '" Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)" ' + '--resolve gitea99.opendev.org:3081:127.0.0.1 ' + 'https://gitea99.opendev.org:3081/') + assert '403 Forbidden' in cmd.stdout