
* Remove a stray trailing ' from the key * update the key url to use https * fix the log path to scrape Change-Id: I580b63f08147494a937d44f4f6637947221c8937
43 lines
1.6 KiB
YAML
43 lines
1.6 KiB
YAML
- hosts: localhost
|
|
tasks:
|
|
- name: Add static.opendev.org to inventory
|
|
add_host:
|
|
name: static.opendev.org
|
|
ansible_connection: ssh
|
|
ansible_host: static.opendev.org
|
|
ansible_port: 22
|
|
ansible_user: zuul
|
|
|
|
- name: Add static.opendev.org host key
|
|
known_hosts:
|
|
name: static.opendev.org
|
|
key: static.opendev.org,23.253.245.150,2001:4800:7818:101:be76:4eff:fe04:7c28 ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBMu3PnnkNhPS2d5Z2uPju3Qqcbbc0lwHA1j9MgHlLnbK3bx1O2Kfez6RJUGl2i6nshdzkKwPBvN2vehQKiw1oSk=
|
|
|
|
# NOTE(ianw): 2020-02-25 just for initial testing run this for one log
|
|
# in a dumb way. We can scrape a few more sites. Overall, we expect
|
|
# this to be replaced with a better analysis tool, see
|
|
# https://review.opendev.org/709236
|
|
- hosts: static.opendev.org
|
|
tasks:
|
|
- name: Run 404 scraping script
|
|
become: yes
|
|
shell: |
|
|
SOURCE_FILE=/var/log/apache2/docs.openstack.org_access.log
|
|
INTERMEDIATE_FILE=$(mktemp)
|
|
|
|
# Get just the lines with 404s in them
|
|
grep ' 404 ' $SOURCE_FILE | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' > $INTERMEDIATE_FILE
|
|
|
|
if [ -f "$SOURCE_FILE.1" ] ; then
|
|
# We get roughly the last days worth of logs by looking at the last two
|
|
# log files.
|
|
grep ' 404 ' $SOURCE_FILE.1 | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' >> $INTERMEDIATE_FILE
|
|
fi
|
|
|
|
# Process those 404s to count them and return sorted by count
|
|
sort $INTERMEDIATE_FILE | uniq -c | sort -rn | grep '\(html\|\/$\)'
|
|
|
|
rm ${INTERMEDIATE_FILE}
|
|
args:
|
|
executable: /bin/bash
|