Merge "Enable multinode gate"

This commit is contained in:
Jenkins 2017-06-06 15:11:40 +00:00 committed by Gerrit Code Review
commit 0a18a6966e
11 changed files with 304 additions and 200 deletions

View File

@ -4,10 +4,12 @@
become: True
when: ansible_os_family == 'Debian'
# TODO(inc0): Gates don't seem to have ufw executable, check for it instead of ignore errors
- name: Set firewall default policy
become: True
ufw: state=disabled policy=allow
when: ansible_os_family == 'Debian'
ignore_errors: yes
- name: Check if firewalld is installed
command: rpm -q firewalld

View File

@ -0,0 +1,40 @@
---
- hosts: localhost
connection: local
become: True
tasks:
- name: Get api_interface name
set_fact: api_interface_address="{{ lookup('file', '/etc/nodepool/primary_node_private') }}"
- shell: "ip a | grep {{ api_interface_address }}"
register: api_interface_name
- set_fact: api_interface_name="{{ api_interface_name.stdout_lines[0].split(" ")[-1] }}"
- set_fact:
is_multinode: "{{ lookup('file', '/etc/nodepool/sub_nodes') }}"
- name: Ensure /etc/kolla dir
file:
path: /etc/kolla
state: "directory"
- name: Setup globals.yml
template:
src: "templates/globals-default.j2"
dest: "/etc/kolla/globals.yml"
- name: Copy passwords.yml
copy:
src: "../etc/kolla/passwords.yml"
dest: "/etc/kolla/passwords.yml"
- name: Ensure /etc/kolla/config directory
file:
path: /etc/kolla/config/nova
state: "directory"
- name: Setup overrides
template:
src: "templates/nova-compute-overrides.j2"
dest: "/etc/kolla/config/nova/nova-compute.conf"

View File

@ -0,0 +1,26 @@
---
- hosts: localhost
connection: local
tasks:
- name: Get node addresses
set_fact:
primary_node_address: "{{ lookup('file', '/etc/nodepool/primary_node_private') }}"
sub_node_addresses: "{{ lookup('file', '/etc/nodepool/sub_nodes_private').split('\n') }}"
- set_fact: node_group="{{ lookup('template', 'templates/inventory.j2')}}"
- name: Ensure /tmp/kolla exists
file:
path: "/tmp/kolla"
state: "directory"
- name: Copy default ansible kolla-ansible inventory
copy:
src: ../ansible/inventory/all-in-one
dest: /tmp/kolla/raw_inventory
- name: Replace localhost with IPs
replace:
path: /tmp/kolla/raw_inventory
regexp: "localhost.*$"
replace: "{{ node_group }}"

View File

@ -0,0 +1,88 @@
---
- hosts: all
tasks:
- name: Check node role
command: "cat /etc/nodepool/role"
register: node_role
- set_fact: node_role="{{ node_role.stdout }}"
- name: Ensure /tmp/logs dir
file:
path: "/tmp/logs"
state: "directory"
when:
- node_role == "sub"
- name: Get /tmp/logs symlink target
command: "readlink -f /tmp/logs"
register: logs_target
- name: Ensure primary node directories
file:
path: "{{ logs_target.stdout }}/{{ item }}"
state: "directory"
mode: 0777
when:
- node_role == "primary"
with_items:
- "subnodes"
- "docker_logs"
- "kolla_configs"
- "system_logs"
- "kolla"
- name: Ensure sub node directories
file:
path: "/tmp/logs/{{ item }}"
state: "directory"
mode: 0777
when:
- node_role == "sub"
with_items:
- "docker_logs"
- "kolla_configs"
- "system_logs"
- "kolla"
- name: Run diagnostics script
script: get_logs.sh
register: get_logs_result
failed_when: false
- name: Print get_logs output
debug:
msg: "{{ get_logs_result.stdout }}"
- name: Run dump_info script
script: ../tools/dump_info.sh
- name: Download logs from all subnodes
synchronize:
src: "/tmp/logs"
dest: "{{ logs_target.stdout }}/subnodes/{{ ansible_hostname }}"
mode: "pull"
when:
- node_role == "sub"
- name: Change permission of all log files
command: "chmod -R 777 {{ logs_target.stdout }}"
- name: Fail if get_logs has failed
fail:
msg: "{{ get_logs_result.stdout }}"
when: get_logs_result.rc != 0
- name: Copy inventory file to logs
copy:
src: "/tmp/kolla/raw_inventory"
dest: "{{ logs_target.stdout }}/ansible/inventory"
when:
- node_role == "primary"
- name: Copy hosts file to logs
copy:
src: "/etc/hosts"
dest: "{{ logs_target.stdout }}/system_logs/hosts"
when:
- node_role == "primary"

43
tests/get_logs.sh Normal file
View File

@ -0,0 +1,43 @@
#!/bin/bash
copy_logs() {
cp -rnL /var/lib/docker/volumes/kolla_logs/_data/* /tmp/logs/kolla/
cp -rnL /etc/kolla/* /tmp/logs/kolla_configs/
cp -rvnL /var/log/* /tmp/logs/system_logs/
if [[ -x "$(command -v journalctl)" ]]; then
journalctl --no-pager -u docker.service > /tmp/logs/system_logs/docker.log
else
cp /var/log/upstart/docker.log /tmp/logs/system_logs/docker.log
fi
}
check_failure() {
# Command failures after this point can be expected
set +o errexit
docker images
docker ps -a
# All docker container's status are created, restarting, running, removing,
# paused, exited and dead. Containers without running status are treated as
# failure. removing is added in docker 1.13, just ignore it now.
failed_containers=$(docker ps -a --format "{{.Names}}" \
--filter status=created \
--filter status=restarting \
--filter status=paused \
--filter status=exited \
--filter status=dead)
for failed in ${failed_containers}; do
docker logs --tail all ${failed} > /tmp/logs/docker_logs/${failed}
done
copy_logs
if [[ -n "$failed_containers" ]]; then
exit 1;
fi
}
check_failure

View File

@ -0,0 +1,31 @@
---
kolla_base_distro: "{{ base }}"
kolla_install_type: "{{ type }}"
{% if is_multinode %}
enable_haproxy: "no"
kolla_internal_vip_address: "{{ api_interface_address }}"
{% else %}
kolla_internal_vip_address: "169.254.169.10"
{% endif %}
network_interface: "{{ api_interface_name }}"
docker_restart_policy: "never"
# NOTE(Jeffrey4l): use different a docker namespace name in case it pull image from hub.docker.io when deplying
docker_namespace: "lokolla"
docker_registry: "{{ api_interface_address }}:4000"
neutron_external_interface: "fake_interface"
enable_horizon: "yes"
enable_heat: "no"
openstack_logging_debug: "True"
openstack_service_workers: "1"
# enable port security in gate until this bug is fixed
# https://bugs.launchpad.net/neutron/+bug/1694420
extension_drivers:
- name: "qos"
enabled: "{{ '{{' }} enable_neutron_qos | bool {{ '}}' }}"
- name: "port_security"
enabled: true
- name: "dns"
enabled: "{{ '{{' }} enable_designate | bool {{ '}}' }}"

View File

@ -0,0 +1,6 @@
{{ primary_node_address }} ansible_become=true ansible_connection=local
{% for addr in sub_node_addresses %}
{% if addr %}
{{ addr }} ansible_user=jenkins ansible_become=true ansible_ssh_private_key_file=/etc/nodepool/id_rsa
{% endif %}
{% endfor %}

View File

@ -0,0 +1,4 @@
[libvirt]
virt_type=qemu
# NOTE(Jeffrey4l): fix the gate in iax-ord nodes for libvirt 2.0.
cpu_mode=none

View File

@ -1,140 +0,0 @@
#!/bin/bash
set -o xtrace
set -o errexit
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
export KOLLA_BASE=$1
export KOLLA_TYPE=$2
export KEEPALIVED_VIRTUAL_ROUTER_ID=$(shuf -i 1-255 -n 1)
function copy_logs {
cp -rnL /var/lib/docker/volumes/kolla_logs/_data/* /tmp/logs/kolla/
cp -rnL /etc/kolla/* /tmp/logs/kolla_configs/
cp -rvnL /var/log/* /tmp/logs/system_logs/
if [[ -x "$(command -v journalctl)" ]]; then
journalctl --no-pager -u docker.service > /tmp/logs/system_logs/docker.log
else
cp /var/log/upstart/docker.log /tmp/logs/system_logs/docker.log
fi
# NOTE(SamYaple): Fix permissions for log extraction in gate
chmod -R 777 /tmp/logs/kolla /tmp/logs/kolla_configs /tmp/logs/system_logs
ara generate html /tmp/logs/playbook_reports/
gzip --recursive --best /tmp/logs/playbook_reports/
}
function sanity_check {
# Wait for service ready
sleep 15
. /etc/kolla/admin-openrc.sh
# TODO(Jeffrey4l): Restart the memcached container to cleanup all cache.
# Remove this after this bug is fixed
# https://bugs.launchpad.net/oslo.cache/+bug/1590779
docker restart memcached
nova --debug service-list
neutron --debug agent-list
tools/init-runonce
nova --debug boot --poll --image $(openstack image list | awk '/cirros/ {print $2}') --nic net-id=$(openstack network list | awk '/demo-net/ {print $2}') --flavor 1 kolla_boot_test
nova --debug list
# If the status is not ACTIVE, print info and exit 1
nova --debug show kolla_boot_test | awk '{buf=buf"\n"$0} $2=="status" && $4!="ACTIVE" {failed="yes"}; END {if (failed=="yes") {print buf; exit 1}}'
}
function check_failure {
# Command failures after this point can be expected
set +o errexit
docker images
docker ps -a
# All docker container's status are created, restarting, running, removing,
# paused, exited and dead. Containers without running status are treated as
# failure. removing is added in docker 1.13, just ignore it now.
failed_containers=$(docker ps -a --format "{{.Names}}" \
--filter status=created \
--filter status=restarting \
--filter status=paused \
--filter status=exited \
--filter status=dead)
for failed in ${failed_containers}; do
docker logs --tail all ${failed}
done
copy_logs
if [[ -n "$failed_containers" ]]; then
exit 1;
fi
}
function write_configs {
mkdir -p /etc/kolla/config
PRIVATE_ADDRESS=$(cat /etc/nodepool/node_private)
PRIVATE_INTERFACE=$(ip -4 --oneline address | awk -v pattern=${PRIVATE_ADDRESS} '$0 ~ pattern {print $2}')
cat << EOF > /etc/kolla/globals.yml
---
kolla_base_distro: "${KOLLA_BASE}"
kolla_install_type: "${KOLLA_TYPE}"
kolla_internal_vip_address: "169.254.169.10"
keepalived_virtual_router_id: "${KEEPALIVED_VIRTUAL_ROUTER_ID}"
docker_restart_policy: "never"
# NOTE(Jeffrey4l): use different a docker namespace name in case it pull image from hub.docker.io when deplying
docker_namespace: "lokolla"
docker_registry: "${PRIVATE_ADDRESS}:4000"
network_interface: "${PRIVATE_INTERFACE}"
neutron_external_interface: "fake_interface"
enable_horizon: "yes"
enable_heat: "no"
openstack_logging_debug: "True"
openstack_service_workers: "1"
# enable port security in gate until this bug is fixed
# https://bugs.launchpad.net/neutron/+bug/1694420
extension_drivers:
- name: "qos"
enabled: "{{ enable_neutron_qos | bool }}"
- name: "port_security"
enabled: true
- name: "dns"
enabled: "{{ enable_designate | bool }}"
EOF
mkdir /etc/kolla/config/nova
cat << EOF > /etc/kolla/config/nova/nova-compute.conf
[libvirt]
virt_type=qemu
# NOTE(Jeffrey4l): fix the gate in iax-ord nodes for libvirt 2.0.
cpu_mode=none
EOF
}
trap check_failure EXIT
write_configs
# Create dummy interface for neutron
ip l a fake_interface type dummy
# Actually do the deployment
tools/kolla-ansible -vvv prechecks
# TODO(jeffrey4l): add pull action when we have a local registry
# service in CI
tools/kolla-ansible -vvv deploy
tools/kolla-ansible -vvv post-deploy
# Test OpenStack Environment
sanity_check
# TODO(jeffrey4l): make some configure file change and
# trigger a real reconfigure
tools/kolla-ansible -vvv reconfigure
# TODO(jeffrey4l): need run a real upgrade
tools/kolla-ansible -vvv upgrade
# run prechecks again
tools/kolla-ansible -vvv prechecks

View File

@ -3,25 +3,10 @@
become: true
tasks:
- name: Setup /etc/hosts
copy:
src: /etc/hosts
dest: /etc/hosts
- name: Ensure /etc/hostname is valid for SELinux
command: restorecon -v /etc/hostname
when: ansible_os_family == 'RedHat'
- name: Assign hostname
hostname:
name: "{{ inventory_hostname }}"
- name: Copy setup script
copy:
src: setup_{{ ansible_os_family }}.sh
dest: /tmp/setup.sh
mode: 0755
- name: Install wget package
package: name=wget
@ -34,11 +19,3 @@
state: directory
path: /tmp/{{ inventory_hostname }}
become: false
- name: Run node setup
shell: /tmp/setup.sh
- name: Changing permissions of Docker socket to 666
file:
path: /run/docker.sock
mode: 0666

View File

@ -33,10 +33,8 @@ EOF
}
function setup_config {
sudo cp -r etc/kolla /etc/
# Generate passwords
sudo tools/generate_passwords.py
sudo mkdir /etc/kolla
sudo chmod -R 777 /etc/kolla
# Use Infra provided pypi.
# Wheel package mirror may be not compatible. So do not enable it.
PIP_CONF=$(mktemp)
@ -102,39 +100,14 @@ function setup_workaround_broken_nodepool {
}
function setup_ssh {
# Generate a new keypair that Ansible will use
ssh-keygen -f /home/jenkins/.ssh/kolla -N ''
cat /home/jenkins/.ssh/kolla.pub >> /home/jenkins/.ssh/authorized_keys
# Push the public key around to all of the nodes
for ip in $(cat /etc/nodepool/sub_nodes_private); do
scp /home/jenkins/.ssh/kolla.pub ${ip}:/home/jenkins/.ssh/authorized_keys
# TODO(SamYaple): Remove this root key pushing once Kolla doesn't
# require root anymore.
ssh ${ip} -i /home/jenkins/.ssh/kolla 'sudo mkdir -p /root/.ssh; sudo cp /home/jenkins/.ssh/* /root/.ssh/'
done
# From now on use the new IdentityFile for connecting to other hosts
echo "IdentityFile /home/jenkins/.ssh/kolla" >> /home/jenkins/.ssh/config
chmod 600 /home/jenkins/.ssh/config
sudo chown jenkins /etc/nodepool/id_rsa
sudo chmod 600 /etc/nodepool/id_rsa
}
function setup_inventory {
local counter=0
echo -e "127.0.0.1\tlocalhost" > /tmp/hosts
for ip in $(cat /etc/nodepool/{node_private,sub_nodes_private}); do
: $((counter++))
# FIXME(jeffrey4l): do not set two hostnames in oneline. this is a
# wordround fix for the rabbitmq failed when deploy on CentOS in the CI
# gate. the ideal fix should set the hostname in setup_gate.sh script.
# But it do not work as expect with unknown reason
ssh-keyscan "${ip}" >> ~/.ssh/known_hosts
echo -e "${ip}\tnode${counter}" >> /tmp/hosts
echo -e "${ip}\t$(ssh ${ip} hostname)" >> /tmp/hosts
echo "node${counter}" >> ${RAW_INVENTORY}
done
ansible-playbook tests/ansible_generate_inventory.yml
sudo chown root: /tmp/hosts
sudo chmod 644 /tmp/hosts
sudo mv /tmp/hosts /etc/hosts
@ -150,7 +123,6 @@ function setup_ansible {
setup_inventory
sudo mkdir /etc/ansible
sudo tee /etc/ansible/ansible.cfg<<EOF
[defaults]
@ -177,7 +149,7 @@ function setup_logging {
}
function prepare_images {
docker run -d -p 4000:5000 --restart=always -v /tmp/kolla_registry/:/var/lib/registry --name registry registry:2
sudo docker run -d -p 4000:5000 --restart=always -v /tmp/kolla_registry/:/var/lib/registry --name registry registry:2
# NOTE(Jeffrey4l): Zuul adds all changes depend on to ZUUL_CHANGES
# variable. if find "openstack/kolla:" string, it means this patch depends
@ -196,16 +168,71 @@ function prepare_images {
fi
}
function sanity_check {
# Wait for service ready
sleep 15
. /etc/kolla/admin-openrc.sh
# TODO(Jeffrey4l): Restart the memcached container to cleanup all cache.
# Remove this after this bug is fixed
# https://bugs.launchpad.net/oslo.cache/+bug/1590779
sudo docker restart memcached
nova --debug service-list
neutron --debug agent-list
tools/init-runonce
nova --debug boot --poll --image $(openstack image list | awk '/cirros/ {print $2}') --nic net-id=$(openstack network list | awk '/demo-net/ {print $2}') --flavor 1 kolla_boot_test
nova --debug list
# If the status is not ACTIVE, print info and exit 1
nova --debug show kolla_boot_test | awk '{buf=buf"\n"$0} $2=="status" && $4!="ACTIVE" {failed="yes"}; END {if (failed=="yes") {print buf; exit 1}}'
}
function get_logs {
ansible-playbook -i ${RAW_INVENTORY} tests/ansible_get_logs.yml > /tmp/logs/ansible/get-logs
}
setup_logging
tools/dump_info.sh
clone_repos
setup_workaround_broken_nodepool
setup_ssh
setup_ansible
setup_node
setup_config
setup_node
ansible-playbook -e type=$INSTALL_TYPE -e base=$BASE_DISTRO tests/ansible_generate_config.yml > /tmp/logs/ansible/generate_config
tools/kolla-ansible -i ${RAW_INVENTORY} bootstrap-servers > /tmp/logs/ansible/bootstrap-servers
sudo tools/generate_passwords.py
prepare_images
sudo tools/deploy_aio.sh "${BASE_DISTRO}" "${INSTALL_TYPE}"
trap get_logs EXIT
tools/dump_info.sh
# Create dummy interface for neutron
ansible -m shell -i ${RAW_INVENTORY} -a "ip l a fake_interface type dummy" all
#TODO(inc0): Post-deploy complains that /etc/kolla is not writable. Probably we need to include become there
sudo chmod -R 777 /etc/kolla
# Actually do the deployment
tools/kolla-ansible -i ${RAW_INVENTORY} -vvv prechecks > /tmp/logs/ansible/prechecks1
# TODO(jeffrey4l): add pull action when we have a local registry
# service in CI
tools/kolla-ansible -i ${RAW_INVENTORY} -vvv deploy > /tmp/logs/ansible/deploy
tools/kolla-ansible -i ${RAW_INVENTORY} -vvv post-deploy > /tmp/logs/ansible/post-deploy
# Test OpenStack Environment
# TODO: use kolla-ansible check when it's ready
sanity_check
# TODO(jeffrey4l): make some configure file change and
# trigger a real reconfigure
tools/kolla-ansible -i ${RAW_INVENTORY} -vvv reconfigure > /tmp/logs/ansible/post-deploy
# TODO(jeffrey4l): need run a real upgrade
tools/kolla-ansible -i ${RAW_INVENTORY} -vvv upgrade > /tmp/logs/ansible/upgrade
# run prechecks again
tools/kolla-ansible -i ${RAW_INVENTORY} -vvv prechecks > /tmp/logs/ansible/prechecks2
get_logs
ara generate html /tmp/logs/playbook_reports/
gzip --recursive --best /tmp/logs/playbook_reports/