Retire repository
Retire repository following the process on retiring an OpenStack repository: http://docs.openstack.org/infra/manual/drivers.html#remove-project-content This removes *all* content and just leaves a single README.rst that explains how to get the content Depends-On: I9f4e21b44c717d11511fea48db54a52103e294b1 Change-Id: I4281697640489a779a3ef82b23174262a0baa3fc
This commit is contained in:
parent
c41067857e
commit
1510c36ac5
24
.gitignore
vendored
24
.gitignore
vendored
@ -1,24 +0,0 @@
|
|||||||
.DS_Store
|
|
||||||
*.xpr
|
|
||||||
|
|
||||||
# Packages
|
|
||||||
.venv
|
|
||||||
*.egg
|
|
||||||
*.egg-info
|
|
||||||
|
|
||||||
# Build directories
|
|
||||||
target/
|
|
||||||
publish-docs/
|
|
||||||
build/
|
|
||||||
/build-*.log.gz
|
|
||||||
|
|
||||||
# Testenvironment
|
|
||||||
.tox/
|
|
||||||
|
|
||||||
# Transifex Client Setting
|
|
||||||
.tx
|
|
||||||
|
|
||||||
# Editors
|
|
||||||
*~
|
|
||||||
.*.swp
|
|
||||||
.bak
|
|
@ -1,4 +0,0 @@
|
|||||||
[gerrit]
|
|
||||||
host=review.openstack.org
|
|
||||||
port=29418
|
|
||||||
project=openstack/ha-guide.git
|
|
72
README.rst
72
README.rst
@ -1,65 +1,13 @@
|
|||||||
OpenStack High Availability Guide
|
This project is no longer maintained.
|
||||||
+++++++++++++++++++++++++++++++++
|
|
||||||
|
|
||||||
This repository contains the OpenStack High Availability Guide.
|
The contents of this repository are still available in the Git
|
||||||
|
source code management system. To see the contents of this
|
||||||
|
repository before it reached its end of life, please check out the
|
||||||
|
previous commit with "git checkout HEAD^1".
|
||||||
|
|
||||||
For more details, see the `OpenStack Documentation wiki page
|
The content has been merged into the openstack-manuals repository at
|
||||||
<http://wiki.openstack.org/Documentation>`_.
|
http://git.openstack.org/cgit/openstack/openstack-manuals/
|
||||||
|
|
||||||
Building
|
For any further questions, please email
|
||||||
========
|
openstack-docs@lists.openstack.org or join #openstack-doc on
|
||||||
|
Freenode.
|
||||||
The root directory of the *OpenStack High Availability Guide*
|
|
||||||
is ``doc/ha-guide``.
|
|
||||||
|
|
||||||
To build the guide, run ``tox -e docs``.
|
|
||||||
|
|
||||||
Testing of changes and building of the manual
|
|
||||||
=============================================
|
|
||||||
|
|
||||||
Install the python tox package and run ``tox`` from the top-level
|
|
||||||
directory to use the same tests that are done as part of our Jenkins
|
|
||||||
gating jobs.
|
|
||||||
|
|
||||||
If you like to run individual tests, run:
|
|
||||||
|
|
||||||
* ``tox -e checkniceness`` - to run the niceness tests
|
|
||||||
* ``tox -e checkbuild`` - to actually build the manual
|
|
||||||
|
|
||||||
tox will use the openstack-doc-tools package for execution of these
|
|
||||||
tests.
|
|
||||||
|
|
||||||
|
|
||||||
Contributing
|
|
||||||
============
|
|
||||||
|
|
||||||
Our community welcomes all people interested in open source cloud
|
|
||||||
computing, and encourages you to join the `OpenStack Foundation
|
|
||||||
<http://www.openstack.org/join>`_.
|
|
||||||
|
|
||||||
The best way to get involved with the community is to talk with others
|
|
||||||
online or at a meet up and offer contributions through our processes,
|
|
||||||
the `OpenStack wiki <http://wiki.openstack.org>`_, blogs, or on IRC at
|
|
||||||
``#openstack`` on ``irc.freenode.net``.
|
|
||||||
|
|
||||||
We welcome all types of contributions, from blueprint designs to
|
|
||||||
documentation to testing to deployment scripts.
|
|
||||||
|
|
||||||
If you would like to contribute to the documents, please see the
|
|
||||||
`OpenStack Documentation Contributor Guide
|
|
||||||
<http://docs.openstack.org/contributor-guide/>`_.
|
|
||||||
|
|
||||||
|
|
||||||
Bugs
|
|
||||||
====
|
|
||||||
|
|
||||||
Bugs should be filed on Launchpad, not GitHub:
|
|
||||||
|
|
||||||
https://bugs.launchpad.net/openstack-manuals
|
|
||||||
|
|
||||||
|
|
||||||
Installing
|
|
||||||
==========
|
|
||||||
|
|
||||||
Refer to http://docs.openstack.org to see where these documents are published
|
|
||||||
and to learn more about the OpenStack project.
|
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
[DEFAULT]
|
|
||||||
repo_name = ha-guide
|
|
@ -1,31 +0,0 @@
|
|||||||
# Configuration for translation setup.
|
|
||||||
|
|
||||||
# directories to be set up
|
|
||||||
declare -A DIRECTORIES=(
|
|
||||||
)
|
|
||||||
|
|
||||||
# books to be built
|
|
||||||
declare -A BOOKS=(
|
|
||||||
["ja"]="ha-guide"
|
|
||||||
)
|
|
||||||
|
|
||||||
# draft books
|
|
||||||
declare -A DRAFTS=(
|
|
||||||
["ja"]="ha-guide"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Where does the top-level pom live?
|
|
||||||
# Set to empty to not copy it.
|
|
||||||
POM_FILE=""
|
|
||||||
|
|
||||||
# Location of doc dir
|
|
||||||
DOC_DIR="doc/"
|
|
||||||
|
|
||||||
# Books with special handling
|
|
||||||
# Values need to match content in project-config/jenkins/scripts/common_translation_update.sh
|
|
||||||
declare -A SPECIAL_BOOKS
|
|
||||||
SPECIAL_BOOKS=(
|
|
||||||
["ha-guide"]="RST"
|
|
||||||
# These are translated in openstack-manuals
|
|
||||||
["common"]="skip"
|
|
||||||
)
|
|
@ -1,7 +0,0 @@
|
|||||||
Important note about this directory
|
|
||||||
===================================
|
|
||||||
|
|
||||||
Because this directory is synced from openstack-manuals, make any changes in
|
|
||||||
openstack-manuals/doc/common. After changes to the synced files merge to
|
|
||||||
openstack-manuals/doc/common, a patch is automatically proposed for this
|
|
||||||
directory.
|
|
@ -1,256 +0,0 @@
|
|||||||
.. ## WARNING ##########################################################
|
|
||||||
.. This file is synced from openstack/openstack-manuals repository to
|
|
||||||
.. other related repositories. If you need to make changes to this file,
|
|
||||||
.. make the changes in openstack-manuals. After any change merged to,
|
|
||||||
.. openstack-manuals, automatically a patch for others will be proposed.
|
|
||||||
.. #####################################################################
|
|
||||||
|
|
||||||
=================
|
|
||||||
Community support
|
|
||||||
=================
|
|
||||||
|
|
||||||
The following resources are available to help you run and use OpenStack.
|
|
||||||
The OpenStack community constantly improves and adds to the main
|
|
||||||
features of OpenStack, but if you have any questions, do not hesitate to
|
|
||||||
ask. Use the following resources to get OpenStack support, and
|
|
||||||
troubleshoot your installations.
|
|
||||||
|
|
||||||
Documentation
|
|
||||||
~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
For the available OpenStack documentation, see
|
|
||||||
`docs.openstack.org <http://docs.openstack.org>`__.
|
|
||||||
|
|
||||||
To provide feedback on documentation, join and use the
|
|
||||||
openstack-docs@lists.openstack.org mailing list at `OpenStack
|
|
||||||
Documentation Mailing
|
|
||||||
List <http://lists.openstack.org/cgi-bin/mailman/listinfo/openstack-docs>`__,
|
|
||||||
or `report a
|
|
||||||
bug <https://bugs.launchpad.net/openstack-manuals/+filebug>`__.
|
|
||||||
|
|
||||||
The following books explain how to install an OpenStack cloud and its
|
|
||||||
associated components:
|
|
||||||
|
|
||||||
* `Installation Guide for openSUSE Leap 42.1 and SUSE Linux Enterprise
|
|
||||||
Server 12 SP1
|
|
||||||
<http://docs.openstack.org/mitaka/install-guide-obs/>`__
|
|
||||||
|
|
||||||
* `Installation Guide for Red Hat Enterprise Linux 7 and CentOS 7
|
|
||||||
<http://docs.openstack.org/mitaka/install-guide-rdo/>`__
|
|
||||||
|
|
||||||
* `Installation Guide for Ubuntu 14.04 (LTS)
|
|
||||||
<http://docs.openstack.org/mitaka/install-guide-ubuntu/>`__
|
|
||||||
|
|
||||||
The following books explain how to configure and run an OpenStack cloud:
|
|
||||||
|
|
||||||
* `Architecture Design Guide <http://docs.openstack.org/arch-design/>`__
|
|
||||||
|
|
||||||
* `Administrator Guide <http://docs.openstack.org/admin-guide/>`__
|
|
||||||
|
|
||||||
* `Configuration Reference <http://docs.openstack.org/mitaka/config-reference/>`__
|
|
||||||
|
|
||||||
* `Operations Guide <http://docs.openstack.org/ops/>`__
|
|
||||||
|
|
||||||
* `Networking Guide <http://docs.openstack.org/mitaka/networking-guide>`__
|
|
||||||
|
|
||||||
* `High Availability Guide <http://docs.openstack.org/ha-guide/>`__
|
|
||||||
|
|
||||||
* `Security Guide <http://docs.openstack.org/sec/>`__
|
|
||||||
|
|
||||||
* `Virtual Machine Image Guide <http://docs.openstack.org/image-guide/>`__
|
|
||||||
|
|
||||||
The following books explain how to use the OpenStack dashboard and
|
|
||||||
command-line clients:
|
|
||||||
|
|
||||||
* `API Guide <http://developer.openstack.org/api-guide/quick-start/>`__
|
|
||||||
|
|
||||||
* `End User Guide <http://docs.openstack.org/user-guide/>`__
|
|
||||||
|
|
||||||
* `Command-Line Interface Reference
|
|
||||||
<http://docs.openstack.org/cli-reference/>`__
|
|
||||||
|
|
||||||
The following documentation provides reference and guidance information
|
|
||||||
for the OpenStack APIs:
|
|
||||||
|
|
||||||
* `API Complete Reference
|
|
||||||
(HTML) <http://developer.openstack.org/api-ref.html>`__
|
|
||||||
|
|
||||||
* `API Complete Reference
|
|
||||||
(PDF) <http://developer.openstack.org/api-ref-guides/bk-api-ref.pdf>`__
|
|
||||||
|
|
||||||
The following guide provides how to contribute to OpenStack documentation:
|
|
||||||
|
|
||||||
* `Documentation Contributor Guide <http://docs.openstack.org/contributor-guide/>`__
|
|
||||||
|
|
||||||
ask.openstack.org
|
|
||||||
~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
During the set up or testing of OpenStack, you might have questions
|
|
||||||
about how a specific task is completed or be in a situation where a
|
|
||||||
feature does not work correctly. Use the
|
|
||||||
`ask.openstack.org <https://ask.openstack.org>`__ site to ask questions
|
|
||||||
and get answers. When you visit the https://ask.openstack.org site, scan
|
|
||||||
the recently asked questions to see whether your question has already
|
|
||||||
been answered. If not, ask a new question. Be sure to give a clear,
|
|
||||||
concise summary in the title and provide as much detail as possible in
|
|
||||||
the description. Paste in your command output or stack traces, links to
|
|
||||||
screen shots, and any other information which might be useful.
|
|
||||||
|
|
||||||
OpenStack mailing lists
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
A great way to get answers and insights is to post your question or
|
|
||||||
problematic scenario to the OpenStack mailing list. You can learn from
|
|
||||||
and help others who might have similar issues. To subscribe or view the
|
|
||||||
archives, go to
|
|
||||||
http://lists.openstack.org/cgi-bin/mailman/listinfo/openstack. If you are
|
|
||||||
interested in the other mailing lists for specific projects or development,
|
|
||||||
refer to `Mailing Lists <https://wiki.openstack.org/wiki/Mailing_Lists>`__.
|
|
||||||
|
|
||||||
The OpenStack wiki
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The `OpenStack wiki <https://wiki.openstack.org/>`__ contains a broad
|
|
||||||
range of topics but some of the information can be difficult to find or
|
|
||||||
is a few pages deep. Fortunately, the wiki search feature enables you to
|
|
||||||
search by title or content. If you search for specific information, such
|
|
||||||
as about networking or OpenStack Compute, you can find a large amount
|
|
||||||
of relevant material. More is being added all the time, so be sure to
|
|
||||||
check back often. You can find the search box in the upper-right corner
|
|
||||||
of any OpenStack wiki page.
|
|
||||||
|
|
||||||
The Launchpad Bugs area
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The OpenStack community values your set up and testing efforts and wants
|
|
||||||
your feedback. To log a bug, you must sign up for a Launchpad account at
|
|
||||||
https://launchpad.net/+login. You can view existing bugs and report bugs
|
|
||||||
in the Launchpad Bugs area. Use the search feature to determine whether
|
|
||||||
the bug has already been reported or already been fixed. If it still
|
|
||||||
seems like your bug is unreported, fill out a bug report.
|
|
||||||
|
|
||||||
Some tips:
|
|
||||||
|
|
||||||
* Give a clear, concise summary.
|
|
||||||
|
|
||||||
* Provide as much detail as possible in the description. Paste in your
|
|
||||||
command output or stack traces, links to screen shots, and any other
|
|
||||||
information which might be useful.
|
|
||||||
|
|
||||||
* Be sure to include the software and package versions that you are
|
|
||||||
using, especially if you are using a development branch, such as,
|
|
||||||
``"Kilo release" vs git commit bc79c3ecc55929bac585d04a03475b72e06a3208``.
|
|
||||||
|
|
||||||
* Any deployment-specific information is helpful, such as whether you
|
|
||||||
are using Ubuntu 14.04 or are performing a multi-node installation.
|
|
||||||
|
|
||||||
The following Launchpad Bugs areas are available:
|
|
||||||
|
|
||||||
* `Bugs: OpenStack Block Storage
|
|
||||||
(cinder) <https://bugs.launchpad.net/cinder>`__
|
|
||||||
|
|
||||||
* `Bugs: OpenStack Compute (nova) <https://bugs.launchpad.net/nova>`__
|
|
||||||
|
|
||||||
* `Bugs: OpenStack Dashboard
|
|
||||||
(horizon) <https://bugs.launchpad.net/horizon>`__
|
|
||||||
|
|
||||||
* `Bugs: OpenStack Identity
|
|
||||||
(keystone) <https://bugs.launchpad.net/keystone>`__
|
|
||||||
|
|
||||||
* `Bugs: OpenStack Image service
|
|
||||||
(glance) <https://bugs.launchpad.net/glance>`__
|
|
||||||
|
|
||||||
* `Bugs: OpenStack Networking
|
|
||||||
(neutron) <https://bugs.launchpad.net/neutron>`__
|
|
||||||
|
|
||||||
* `Bugs: OpenStack Object Storage
|
|
||||||
(swift) <https://bugs.launchpad.net/swift>`__
|
|
||||||
|
|
||||||
* `Bugs: Application catalog (murano) <https://bugs.launchpad.net/murano>`__
|
|
||||||
|
|
||||||
* `Bugs: Bare metal service (ironic) <https://bugs.launchpad.net/ironic>`__
|
|
||||||
|
|
||||||
* `Bugs: Clustering service (senlin) <https://bugs.launchpad.net/senlin>`__
|
|
||||||
|
|
||||||
* `Bugs: Containers service (magnum) <https://bugs.launchpad.net/magnum>`__
|
|
||||||
|
|
||||||
* `Bugs: Data processing service
|
|
||||||
(sahara) <https://bugs.launchpad.net/sahara>`__
|
|
||||||
|
|
||||||
* `Bugs: Database service (trove) <https://bugs.launchpad.net/trove>`__
|
|
||||||
|
|
||||||
* `Bugs: Deployment service (fuel) <https://bugs.launchpad.net/fuel>`__
|
|
||||||
|
|
||||||
* `Bugs: DNS service (designate) <https://bugs.launchpad.net/designate>`__
|
|
||||||
|
|
||||||
* `Bugs: Key Manager Service (barbican) <https://bugs.launchpad.net/barbican>`__
|
|
||||||
|
|
||||||
* `Bugs: Monitoring (monasca) <https://bugs.launchpad.net/monasca>`__
|
|
||||||
|
|
||||||
* `Bugs: Orchestration (heat) <https://bugs.launchpad.net/heat>`__
|
|
||||||
|
|
||||||
* `Bugs: Rating (cloudkitty) <https://bugs.launchpad.net/cloudkitty>`__
|
|
||||||
|
|
||||||
* `Bugs: Shared file systems (manila) <https://bugs.launchpad.net/manila>`__
|
|
||||||
|
|
||||||
* `Bugs: Telemetry
|
|
||||||
(ceilometer) <https://bugs.launchpad.net/ceilometer>`__
|
|
||||||
|
|
||||||
* `Bugs: Telemetry v3
|
|
||||||
(gnocchi) <https://bugs.launchpad.net/gnocchi>`__
|
|
||||||
|
|
||||||
* `Bugs: Workflow service
|
|
||||||
(mistral) <https://bugs.launchpad.net/mistral>`__
|
|
||||||
|
|
||||||
* `Bugs: Messaging service
|
|
||||||
(zaqar) <https://bugs.launchpad.net/zaqar>`__
|
|
||||||
|
|
||||||
* `Bugs: OpenStack API Documentation
|
|
||||||
(developer.openstack.org) <https://bugs.launchpad.net/openstack-api-site>`__
|
|
||||||
|
|
||||||
* `Bugs: OpenStack Documentation
|
|
||||||
(docs.openstack.org) <https://bugs.launchpad.net/openstack-manuals>`__
|
|
||||||
|
|
||||||
The OpenStack IRC channel
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The OpenStack community lives in the #openstack IRC channel on the
|
|
||||||
Freenode network. You can hang out, ask questions, or get immediate
|
|
||||||
feedback for urgent and pressing issues. To install an IRC client or use
|
|
||||||
a browser-based client, go to
|
|
||||||
`https://webchat.freenode.net/ <https://webchat.freenode.net>`__. You can
|
|
||||||
also use Colloquy (Mac OS X, http://colloquy.info/), mIRC (Windows,
|
|
||||||
http://www.mirc.com/), or XChat (Linux). When you are in the IRC channel
|
|
||||||
and want to share code or command output, the generally accepted method
|
|
||||||
is to use a Paste Bin. The OpenStack project has one at
|
|
||||||
http://paste.openstack.org. Just paste your longer amounts of text or
|
|
||||||
logs in the web form and you get a URL that you can paste into the
|
|
||||||
channel. The OpenStack IRC channel is ``#openstack`` on
|
|
||||||
``irc.freenode.net``. You can find a list of all OpenStack IRC channels
|
|
||||||
at https://wiki.openstack.org/wiki/IRC.
|
|
||||||
|
|
||||||
Documentation feedback
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
To provide feedback on documentation, join and use the
|
|
||||||
openstack-docs@lists.openstack.org mailing list at `OpenStack
|
|
||||||
Documentation Mailing
|
|
||||||
List <http://lists.openstack.org/cgi-bin/mailman/listinfo/openstack-docs>`__,
|
|
||||||
or `report a
|
|
||||||
bug <https://bugs.launchpad.net/openstack-manuals/+filebug>`__.
|
|
||||||
|
|
||||||
OpenStack distribution packages
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The following Linux distributions provide community-supported packages
|
|
||||||
for OpenStack:
|
|
||||||
|
|
||||||
* **Debian:** https://wiki.debian.org/OpenStack
|
|
||||||
|
|
||||||
* **CentOS, Fedora, and Red Hat Enterprise Linux:**
|
|
||||||
https://www.rdoproject.org/
|
|
||||||
|
|
||||||
* **openSUSE and SUSE Linux Enterprise Server:**
|
|
||||||
https://en.opensuse.org/Portal:OpenStack
|
|
||||||
|
|
||||||
* **Ubuntu:** https://wiki.ubuntu.com/ServerTeam/CloudArchive
|
|
@ -1,47 +0,0 @@
|
|||||||
.. ## WARNING ##########################################################
|
|
||||||
.. This file is synced from openstack/openstack-manuals repository to
|
|
||||||
.. other related repositories. If you need to make changes to this file,
|
|
||||||
.. make the changes in openstack-manuals. After any change merged to,
|
|
||||||
.. openstack-manuals, automatically a patch for others will be proposed.
|
|
||||||
.. #####################################################################
|
|
||||||
|
|
||||||
===========
|
|
||||||
Conventions
|
|
||||||
===========
|
|
||||||
|
|
||||||
The OpenStack documentation uses several typesetting conventions.
|
|
||||||
|
|
||||||
Notices
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
Notices take these forms:
|
|
||||||
|
|
||||||
.. note:: A comment with additional information that explains a part of the
|
|
||||||
text.
|
|
||||||
|
|
||||||
.. important:: Something you must be aware of before proceeding.
|
|
||||||
|
|
||||||
.. tip:: An extra but helpful piece of practical advice.
|
|
||||||
|
|
||||||
.. caution:: Helpful information that prevents the user from making mistakes.
|
|
||||||
|
|
||||||
.. warning:: Critical information about the risk of data loss or security
|
|
||||||
issues.
|
|
||||||
|
|
||||||
Command prompts
|
|
||||||
~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ command
|
|
||||||
|
|
||||||
Any user, including the ``root`` user, can run commands that are
|
|
||||||
prefixed with the ``$`` prompt.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# command
|
|
||||||
|
|
||||||
The ``root`` user must run commands that are prefixed with the ``#``
|
|
||||||
prompt. You can also prefix these commands with the :command:`sudo`
|
|
||||||
command, if available, to run them.
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,30 +0,0 @@
|
|||||||
[metadata]
|
|
||||||
name = openstackhaguide
|
|
||||||
summary = OpenStack High Availability Guide
|
|
||||||
author = OpenStack
|
|
||||||
author-email = openstack-docs@lists.openstack.org
|
|
||||||
home-page = http://docs.openstack.org/
|
|
||||||
classifier =
|
|
||||||
Environment :: OpenStack
|
|
||||||
Intended Audience :: Information Technology
|
|
||||||
Intended Audience :: System Administrators
|
|
||||||
License :: OSI Approved :: Apache Software License
|
|
||||||
Operating System :: POSIX :: Linux
|
|
||||||
Topic :: Documentation
|
|
||||||
|
|
||||||
[global]
|
|
||||||
setup-hooks =
|
|
||||||
pbr.hooks.setup_hook
|
|
||||||
|
|
||||||
[files]
|
|
||||||
|
|
||||||
[build_sphinx]
|
|
||||||
all_files = 1
|
|
||||||
build-dir = build
|
|
||||||
source-dir = source
|
|
||||||
|
|
||||||
[wheel]
|
|
||||||
universal = 1
|
|
||||||
|
|
||||||
[pbr]
|
|
||||||
warnerrors = True
|
|
@ -1,30 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# Copyright (c) 2013 Hewlett-Packard Development Company, L.P.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
# implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# THIS FILE IS MANAGED BY THE GLOBAL REQUIREMENTS REPO - DO NOT EDIT
|
|
||||||
import setuptools
|
|
||||||
|
|
||||||
# In python < 2.7.4, a lazy loading of package `pbr` will break
|
|
||||||
# setuptools if some other modules registered functions in `atexit`.
|
|
||||||
# solution from: http://bugs.python.org/issue15881#msg170215
|
|
||||||
try:
|
|
||||||
import multiprocessing # noqa
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
setuptools.setup(
|
|
||||||
setup_requires=['pbr'],
|
|
||||||
pbr=True)
|
|
@ -1 +0,0 @@
|
|||||||
../../common
|
|
@ -1,12 +0,0 @@
|
|||||||
|
|
||||||
============================================
|
|
||||||
Configure high availability on compute nodes
|
|
||||||
============================================
|
|
||||||
|
|
||||||
The `Installation Guide
|
|
||||||
<http://docs.openstack.org/liberty/#install-guides>`_
|
|
||||||
gives instructions for installing multiple compute nodes.
|
|
||||||
To make them highly available,
|
|
||||||
you must configure the environment
|
|
||||||
to include multiple instances of the API
|
|
||||||
and other services.
|
|
@ -1,10 +0,0 @@
|
|||||||
|
|
||||||
==================================================
|
|
||||||
Configuring the compute node for high availability
|
|
||||||
==================================================
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
compute-node-ha-api.rst
|
|
||||||
|
|
@ -1,289 +0,0 @@
|
|||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
# implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# This file is execfile()d with the current directory set to its
|
|
||||||
# containing dir.
|
|
||||||
#
|
|
||||||
# Note that not all possible configuration values are present in this
|
|
||||||
# autogenerated file.
|
|
||||||
#
|
|
||||||
# All configuration values have a default; values that are commented out
|
|
||||||
# serve to show the default.
|
|
||||||
|
|
||||||
import os
|
|
||||||
# import sys
|
|
||||||
|
|
||||||
import openstackdocstheme
|
|
||||||
|
|
||||||
# If extensions (or modules to document with autodoc) are in another directory,
|
|
||||||
# add these directories to sys.path here. If the directory is relative to the
|
|
||||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
|
||||||
# sys.path.insert(0, os.path.abspath('.'))
|
|
||||||
|
|
||||||
# -- General configuration ------------------------------------------------
|
|
||||||
|
|
||||||
# If your documentation needs a minimal Sphinx version, state it here.
|
|
||||||
# needs_sphinx = '1.0'
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be
|
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
|
||||||
# ones.
|
|
||||||
extensions = []
|
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
|
||||||
# templates_path = ['_templates']
|
|
||||||
|
|
||||||
# The suffix of source filenames.
|
|
||||||
source_suffix = '.rst'
|
|
||||||
|
|
||||||
# The encoding of source files.
|
|
||||||
# source_encoding = 'utf-8-sig'
|
|
||||||
|
|
||||||
# The master toctree document.
|
|
||||||
master_doc = 'index'
|
|
||||||
|
|
||||||
# General information about the project.
|
|
||||||
project = u'High Availability Guide'
|
|
||||||
bug_tag = u'ha-guide'
|
|
||||||
copyright = u'2015, OpenStack contributors'
|
|
||||||
|
|
||||||
# The version info for the project you're documenting, acts as replacement for
|
|
||||||
# |version| and |release|, also used in various other places throughout the
|
|
||||||
# built documents.
|
|
||||||
#
|
|
||||||
# The short X.Y version.
|
|
||||||
version = '0.0.1'
|
|
||||||
# The full version, including alpha/beta/rc tags.
|
|
||||||
release = '0.0.1'
|
|
||||||
|
|
||||||
# A few variables have to be set for the log-a-bug feature.
|
|
||||||
# giturl: The location of conf.py on Git. Must be set manually.
|
|
||||||
# gitsha: The SHA checksum of the bug description. Automatically extracted from git log.
|
|
||||||
# bug_tag: Tag for categorizing the bug. Must be set manually.
|
|
||||||
# These variables are passed to the logabug code via html_context.
|
|
||||||
giturl = u'http://git.openstack.org/cgit/openstack/ha-guide/tree/doc/ha-guide/source'
|
|
||||||
git_cmd = "/usr/bin/git log | head -n1 | cut -f2 -d' '"
|
|
||||||
gitsha = os.popen(git_cmd).read().strip('\n')
|
|
||||||
html_context = {"gitsha": gitsha, "bug_tag": bug_tag,
|
|
||||||
"giturl": giturl}
|
|
||||||
|
|
||||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
|
||||||
# for a list of supported languages.
|
|
||||||
# language = None
|
|
||||||
|
|
||||||
# There are two options for replacing |today|: either, you set today to some
|
|
||||||
# non-false value, then it is used:
|
|
||||||
# today = ''
|
|
||||||
# Else, today_fmt is used as the format for a strftime call.
|
|
||||||
# today_fmt = '%B %d, %Y'
|
|
||||||
|
|
||||||
# List of patterns, relative to source directory, that match files and
|
|
||||||
# directories to ignore when looking for source files.
|
|
||||||
exclude_patterns = []
|
|
||||||
|
|
||||||
# The reST default role (used for this markup: `text`) to use for all
|
|
||||||
# documents.
|
|
||||||
# default_role = None
|
|
||||||
|
|
||||||
# If true, '()' will be appended to :func: etc. cross-reference text.
|
|
||||||
# add_function_parentheses = True
|
|
||||||
|
|
||||||
# If true, the current module name will be prepended to all description
|
|
||||||
# unit titles (such as .. function::).
|
|
||||||
# add_module_names = True
|
|
||||||
|
|
||||||
# If true, sectionauthor and moduleauthor directives will be shown in the
|
|
||||||
# output. They are ignored by default.
|
|
||||||
# show_authors = False
|
|
||||||
|
|
||||||
# The name of the Pygments (syntax highlighting) style to use.
|
|
||||||
pygments_style = 'sphinx'
|
|
||||||
|
|
||||||
# A list of ignored prefixes for module index sorting.
|
|
||||||
# modindex_common_prefix = []
|
|
||||||
|
|
||||||
# If true, keep warnings as "system message" paragraphs in the built documents.
|
|
||||||
# keep_warnings = False
|
|
||||||
|
|
||||||
|
|
||||||
# -- Options for HTML output ----------------------------------------------
|
|
||||||
|
|
||||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
|
||||||
# a list of builtin themes.
|
|
||||||
html_theme = 'openstackdocs'
|
|
||||||
|
|
||||||
# Theme options are theme-specific and customize the look and feel of a theme
|
|
||||||
# further. For a list of options available for each theme, see the
|
|
||||||
# documentation.
|
|
||||||
# html_theme_options = {}
|
|
||||||
|
|
||||||
# Add any paths that contain custom themes here, relative to this directory.
|
|
||||||
html_theme_path = [openstackdocstheme.get_html_theme_path()]
|
|
||||||
|
|
||||||
# The name for this set of Sphinx documents. If None, it defaults to
|
|
||||||
# "<project> v<release> documentation".
|
|
||||||
# html_title = None
|
|
||||||
|
|
||||||
# A shorter title for the navigation bar. Default is the same as html_title.
|
|
||||||
# html_short_title = None
|
|
||||||
|
|
||||||
# The name of an image file (relative to this directory) to place at the top
|
|
||||||
# of the sidebar.
|
|
||||||
# html_logo = None
|
|
||||||
|
|
||||||
# The name of an image file (within the static path) to use as favicon of the
|
|
||||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
|
||||||
# pixels large.
|
|
||||||
# html_favicon = None
|
|
||||||
|
|
||||||
# Add any paths that contain custom static files (such as style sheets) here,
|
|
||||||
# relative to this directory. They are copied after the builtin static files,
|
|
||||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
|
||||||
# html_static_path = []
|
|
||||||
|
|
||||||
# Add any extra paths that contain custom files (such as robots.txt or
|
|
||||||
# .htaccess) here, relative to this directory. These files are copied
|
|
||||||
# directly to the root of the documentation.
|
|
||||||
# html_extra_path = []
|
|
||||||
|
|
||||||
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
|
||||||
# using the given strftime format.
|
|
||||||
# So that we can enable "log-a-bug" links from each output HTML page, this
|
|
||||||
# variable must be set to a format that includes year, month, day, hours and
|
|
||||||
# minutes.
|
|
||||||
html_last_updated_fmt = '%Y-%m-%d %H:%M'
|
|
||||||
|
|
||||||
# If true, SmartyPants will be used to convert quotes and dashes to
|
|
||||||
# typographically correct entities.
|
|
||||||
# html_use_smartypants = True
|
|
||||||
|
|
||||||
# Custom sidebar templates, maps document names to template names.
|
|
||||||
# html_sidebars = {}
|
|
||||||
|
|
||||||
# Additional templates that should be rendered to pages, maps page names to
|
|
||||||
# template names.
|
|
||||||
# html_additional_pages = {}
|
|
||||||
|
|
||||||
# If false, no module index is generated.
|
|
||||||
# html_domain_indices = True
|
|
||||||
|
|
||||||
# If false, no index is generated.
|
|
||||||
html_use_index = False
|
|
||||||
|
|
||||||
# If true, the index is split into individual pages for each letter.
|
|
||||||
# html_split_index = False
|
|
||||||
|
|
||||||
# If true, links to the reST sources are added to the pages.
|
|
||||||
html_show_sourcelink = False
|
|
||||||
|
|
||||||
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
|
||||||
# html_show_sphinx = True
|
|
||||||
|
|
||||||
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
|
||||||
# html_show_copyright = True
|
|
||||||
|
|
||||||
# If true, an OpenSearch description file will be output, and all pages will
|
|
||||||
# contain a <link> tag referring to it. The value of this option must be the
|
|
||||||
# base URL from which the finished HTML is served.
|
|
||||||
# html_use_opensearch = ''
|
|
||||||
|
|
||||||
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
|
||||||
# html_file_suffix = None
|
|
||||||
|
|
||||||
# Output file base name for HTML help builder.
|
|
||||||
htmlhelp_basename = 'ha-guide'
|
|
||||||
|
|
||||||
# If true, publish source files
|
|
||||||
html_copy_source = False
|
|
||||||
|
|
||||||
# -- Options for LaTeX output ---------------------------------------------
|
|
||||||
|
|
||||||
latex_elements = {
|
|
||||||
# The paper size ('letterpaper' or 'a4paper').
|
|
||||||
# 'papersize': 'letterpaper',
|
|
||||||
|
|
||||||
# The font size ('10pt', '11pt' or '12pt').
|
|
||||||
# 'pointsize': '10pt',
|
|
||||||
|
|
||||||
# Additional stuff for the LaTeX preamble.
|
|
||||||
# 'preamble': '',
|
|
||||||
}
|
|
||||||
|
|
||||||
# Grouping the document tree into LaTeX files. List of tuples
|
|
||||||
# (source start file, target name, title,
|
|
||||||
# author, documentclass [howto, manual, or own class]).
|
|
||||||
latex_documents = [
|
|
||||||
('index', 'HAGuide.tex', u'High Availability Guide',
|
|
||||||
u'OpenStack contributors', 'manual'),
|
|
||||||
]
|
|
||||||
|
|
||||||
# The name of an image file (relative to this directory) to place at the top of
|
|
||||||
# the title page.
|
|
||||||
# latex_logo = None
|
|
||||||
|
|
||||||
# For "manual" documents, if this is true, then toplevel headings are parts,
|
|
||||||
# not chapters.
|
|
||||||
# latex_use_parts = False
|
|
||||||
|
|
||||||
# If true, show page references after internal links.
|
|
||||||
# latex_show_pagerefs = False
|
|
||||||
|
|
||||||
# If true, show URL addresses after external links.
|
|
||||||
# latex_show_urls = False
|
|
||||||
|
|
||||||
# Documents to append as an appendix to all manuals.
|
|
||||||
# latex_appendices = []
|
|
||||||
|
|
||||||
# If false, no module index is generated.
|
|
||||||
# latex_domain_indices = True
|
|
||||||
|
|
||||||
|
|
||||||
# -- Options for manual page output ---------------------------------------
|
|
||||||
|
|
||||||
# One entry per manual page. List of tuples
|
|
||||||
# (source start file, name, description, authors, manual section).
|
|
||||||
man_pages = [
|
|
||||||
('index', 'haguide', u'High Availability Guide',
|
|
||||||
[u'OpenStack contributors'], 1)
|
|
||||||
]
|
|
||||||
|
|
||||||
# If true, show URL addresses after external links.
|
|
||||||
# man_show_urls = False
|
|
||||||
|
|
||||||
|
|
||||||
# -- Options for Texinfo output -------------------------------------------
|
|
||||||
|
|
||||||
# Grouping the document tree into Texinfo files. List of tuples
|
|
||||||
# (source start file, target name, title, author,
|
|
||||||
# dir menu entry, description, category)
|
|
||||||
texinfo_documents = [
|
|
||||||
('index', 'HAGuide', u'High Availability Guide',
|
|
||||||
u'OpenStack contributors', 'HAGuide',
|
|
||||||
'This guide shows OpenStack operators and deployers how to configure'
|
|
||||||
'OpenStack Networking to be robust and fault-tolerant.', 'Miscellaneous'),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Documents to append as an appendix to all manuals.
|
|
||||||
# texinfo_appendices = []
|
|
||||||
|
|
||||||
# If false, no module index is generated.
|
|
||||||
# texinfo_domain_indices = True
|
|
||||||
|
|
||||||
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
|
||||||
# texinfo_show_urls = 'footnote'
|
|
||||||
|
|
||||||
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
|
||||||
# texinfo_no_detailmenu = False
|
|
||||||
|
|
||||||
# -- Options for Internationalization output ------------------------------
|
|
||||||
locale_dirs = ['locale/']
|
|
@ -1,396 +0,0 @@
|
|||||||
Configuration
|
|
||||||
==============
|
|
||||||
|
|
||||||
Before you launch Galera Cluster, you need to configure the server
|
|
||||||
and the database to operate as part of the cluster.
|
|
||||||
|
|
||||||
Configuring the server
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Certain services running on the underlying operating system of your
|
|
||||||
OpenStack database may block Galera Cluster from normal operation
|
|
||||||
or prevent ``mysqld`` from achieving network connectivity with the cluster.
|
|
||||||
|
|
||||||
|
|
||||||
Firewall
|
|
||||||
---------
|
|
||||||
|
|
||||||
Galera Cluster requires that you open four ports to network traffic:
|
|
||||||
|
|
||||||
- On ``3306``, Galera Cluster uses TCP for database client connections
|
|
||||||
and State Snapshot Transfers methods that require the client,
|
|
||||||
(that is, ``mysqldump``).
|
|
||||||
- On ``4567`` Galera Cluster uses TCP for replication traffic. Multicast
|
|
||||||
replication uses both TCP and UDP on this port.
|
|
||||||
- On ``4568`` Galera Cluster uses TCP for Incremental State Transfers.
|
|
||||||
- On ``4444`` Galera Cluster uses TCP for all other State Snapshot Transfer
|
|
||||||
methods.
|
|
||||||
|
|
||||||
.. seealso:: For more information on firewalls, see `Firewalls and default ports
|
|
||||||
<http://docs.openstack.org/liberty/config-reference/content/firewalls-default-ports.html>`_, in the Configuration Reference.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
``iptables``
|
|
||||||
^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
For many Linux distributions, you can configure the firewall using
|
|
||||||
the ``iptables`` utility. To do so, complete the following steps:
|
|
||||||
|
|
||||||
#. For each cluster node, run the following commands, replacing
|
|
||||||
``NODE-IP-ADDRESS`` with the IP address of the cluster node
|
|
||||||
you want to open the firewall to:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# iptables --append INPUT --in-interface eth0 \
|
|
||||||
--protocol --match tcp --dport 3306 \
|
|
||||||
--source NODE-IP-ADDRESS --jump ACCEPT
|
|
||||||
# iptables --append INPUT --in-interface eth0 \
|
|
||||||
--protocol --match tcp --dport 4567 \
|
|
||||||
--source NODE-IP-ADDRESS --jump ACCEPT
|
|
||||||
# iptables --append INPUT --in-interface eth0 \
|
|
||||||
--protocol --match tcp --dport 4568 \
|
|
||||||
--source NODE-IP-ADDRESS --jump ACCEPT
|
|
||||||
# iptables --append INPUT --in-interface eth0 \
|
|
||||||
--protocol --match tcp --dport 4444 \
|
|
||||||
--source NODE-IP-ADDRESS --jump ACCEPT
|
|
||||||
|
|
||||||
In the event that you also want to configure multicast replication,
|
|
||||||
run this command as well:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# iptables --append INPUT --in-interface eth0 \
|
|
||||||
--protocol udp --match udp --dport 4567 \
|
|
||||||
--source NODE-IP-ADDRESS --jump ACCEPT
|
|
||||||
|
|
||||||
|
|
||||||
#. Make the changes persistent. For servers that use ``init``, use
|
|
||||||
the :command:`save` command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service save iptables
|
|
||||||
|
|
||||||
For servers that use ``systemd``, you need to save the current packet
|
|
||||||
filtering to the path of the file that ``iptables`` reads when it starts.
|
|
||||||
This path can vary by distribution, but common locations are in the
|
|
||||||
``/etc`` directory, such as:
|
|
||||||
|
|
||||||
- ``/etc/sysconfig/iptables``
|
|
||||||
- ``/etc/iptables/iptables.rules``
|
|
||||||
|
|
||||||
When you find the correct path, run the :command:`iptables-save` command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# iptables-save > /etc/sysconfig/iptables
|
|
||||||
|
|
||||||
With the firewall configuration saved, whenever your OpenStack
|
|
||||||
database starts.
|
|
||||||
|
|
||||||
``firewall-cmd``
|
|
||||||
^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
For many Linux distributions, you can configure the firewall using the
|
|
||||||
``firewall-cmd`` utility for FirewallD. To do so, complete the following
|
|
||||||
steps on each cluster node:
|
|
||||||
|
|
||||||
#. Add the Galera Cluster service:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# firewall-cmd --add-service=mysql
|
|
||||||
|
|
||||||
#. For each instance of OpenStack database in your cluster, run the
|
|
||||||
following commands, replacing ``NODE-IP-ADDRESS`` with the IP address
|
|
||||||
of the cluster node you want to open the firewall to:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# firewall-cmd --add-port=3306/tcp
|
|
||||||
# firewall-cmd --add-port=4567/tcp
|
|
||||||
# firewall-cmd --add-port=4568/tcp
|
|
||||||
# firewall-cmd --add-port=4444/tcp
|
|
||||||
|
|
||||||
In the event that you also want to configure mutlicast replication,
|
|
||||||
run this command as well:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# firewall-cmd --add-port=4567/udp
|
|
||||||
|
|
||||||
#. To make this configuration persistent, repeat the above commands
|
|
||||||
with the :option:`--permanent` option.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# firewall-cmd --add-service=mysql --permanent
|
|
||||||
# firewall-cmd --add-port=3306/tcp --permanent
|
|
||||||
# firewall-cmd --add-port=4567/tcp --permanent
|
|
||||||
# firewall-cmd --add-port=4568/tcp --permanent
|
|
||||||
# firewall-cmd --add-port=4444/tcp --permanent
|
|
||||||
# firewall-cmd --add-port=4567/udp --permanent
|
|
||||||
|
|
||||||
|
|
||||||
With the firewall configuration saved, whenever your OpenStack
|
|
||||||
database starts.
|
|
||||||
|
|
||||||
SELinux
|
|
||||||
--------
|
|
||||||
|
|
||||||
Security-Enhanced Linux is a kernel module for improving security on Linux
|
|
||||||
operating systems. It is commonly enabled and configured by default on
|
|
||||||
Red Hat-based distributions. In the context of Galera Cluster, systems with
|
|
||||||
SELinux may block the database service, keep it from starting or prevent it
|
|
||||||
from establishing network connections with the cluster.
|
|
||||||
|
|
||||||
To configure SELinux to permit Galera Cluster to operate, complete
|
|
||||||
the following steps on each cluster node:
|
|
||||||
|
|
||||||
#. Using the ``semanage`` utility, open the relevant ports:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# semanage port -a -t mysqld_port_t -p tcp 3306
|
|
||||||
# semanage port -a -t mysqld_port_t -p tcp 4567
|
|
||||||
# semanage port -a -t mysqld_port_t -p tcp 4568
|
|
||||||
# semanage port -a -t mysqld_port_t -p tcp 4444
|
|
||||||
|
|
||||||
In the event that you use multicast replication, you also need to
|
|
||||||
open ``4567`` to UDP traffic:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# semanage port -a -t mysqld_port_t -p udp 4567
|
|
||||||
|
|
||||||
#. Set SELinux to allow the database server to run:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# semanage permissive -a mysqld_t
|
|
||||||
|
|
||||||
With these options set, SELinux now permits Galera Cluster to operate.
|
|
||||||
|
|
||||||
.. note:: Bear in mind, leaving SELinux in permissive mode is not a good
|
|
||||||
security practice. Over the longer term, you need to develop a
|
|
||||||
security policy for Galera Cluster and then switch SELinux back
|
|
||||||
into enforcing mode.
|
|
||||||
|
|
||||||
For more information on configuring SELinux to work with
|
|
||||||
Galera Cluster, see the `Documentation
|
|
||||||
<http://galeracluster.com/documentation-webpages/selinux.html>`_
|
|
||||||
|
|
||||||
|
|
||||||
AppArmor
|
|
||||||
---------
|
|
||||||
|
|
||||||
Application Armor is a kernel module for improving security on Linux
|
|
||||||
operating systems. It is developed by Canonical and commonly used on
|
|
||||||
Ubuntu-based distributions. In the context of Galera Cluster, systems
|
|
||||||
with AppArmor may block the database service from operating normally.
|
|
||||||
|
|
||||||
To configure AppArmor to work with Galera Cluster, complete the
|
|
||||||
following steps on each cluster node:
|
|
||||||
|
|
||||||
#. Create a symbolic link for the database server in the ``disable`` directory:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# ln -s /etc/apparmor.d/usr /etc/apparmor.d/disable/.sbin.mysqld
|
|
||||||
|
|
||||||
#. Restart AppArmor. For servers that use ``init``, run the following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service apparmor restart
|
|
||||||
|
|
||||||
For servers that use ``systemd``, instead run this command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# systemctl restart apparmor
|
|
||||||
|
|
||||||
AppArmor now permits Galera Cluster to operate.
|
|
||||||
|
|
||||||
|
|
||||||
Database configuration
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
MySQL databases, including MariaDB and Percona XtraDB, manage their
|
|
||||||
configurations using a ``my.cnf`` file, which is typically located in the
|
|
||||||
``/etc`` directory. Configuration options available in these databases are
|
|
||||||
also available in Galera Cluster, with some restrictions and several
|
|
||||||
additions.
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
[mysqld]
|
|
||||||
datadir=/var/lib/mysql
|
|
||||||
socket=/var/lib/mysql/mysql.sock
|
|
||||||
user=mysql
|
|
||||||
binlog_format=ROW
|
|
||||||
bind-address=0.0.0.0
|
|
||||||
|
|
||||||
# InnoDB Configuration
|
|
||||||
default_storage_engine=innodb
|
|
||||||
innodb_autoinc_lock_mode=2
|
|
||||||
innodb_flush_log_at_trx_commit=0
|
|
||||||
innodb_buffer_pool_size=122M
|
|
||||||
|
|
||||||
# Galera Cluster Configuration
|
|
||||||
wsrep_provider=/usr/lib/libgalera_smm.so
|
|
||||||
wsrep_provider_options="pc.recovery=TRUE;gcache.size=300M"
|
|
||||||
wsrep_cluster_name="my_example_cluster"
|
|
||||||
wsrep_cluster_address="gcomm://GALERA1-IP,GALERA2-IP,GALERA3-IP"
|
|
||||||
wsrep_sst_method=rsync
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Configuring ``mysqld``
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
While all of the configuration parameters available to the standard MySQL,
|
|
||||||
MariaDB or Percona XtraDB database server are available in Galera Cluster,
|
|
||||||
there are some that you must define an outset to avoid conflict or
|
|
||||||
unexpected behavior.
|
|
||||||
|
|
||||||
- Ensure that the database server is not bound only to to the localhost,
|
|
||||||
``127.0.0.1``. Instead, bind it to ``0.0.0.0`` to ensure it listens on
|
|
||||||
all available interfaces.
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
bind-address=0.0.0.0
|
|
||||||
|
|
||||||
- Ensure that the binary log format is set to use row-level replication,
|
|
||||||
as opposed to statement-level replication:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
binlog_format=ROW
|
|
||||||
|
|
||||||
|
|
||||||
Configuring InnoDB
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
Galera Cluster does not support non-transactional storage engines and
|
|
||||||
requires that you use InnoDB by default. There are some additional
|
|
||||||
parameters that you must define to avoid conflicts.
|
|
||||||
|
|
||||||
- Ensure that the default storage engine is set to InnoDB:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
default_storage_engine=InnoDB
|
|
||||||
|
|
||||||
- Ensure that the InnoDB locking mode for generating auto-increment values
|
|
||||||
is set to ``2``, which is the interleaved locking mode.
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
innodb_autoinc_lock_mode=2
|
|
||||||
|
|
||||||
Do not change this value. Other modes may cause ``INSERT`` statements
|
|
||||||
on tables with auto-increment columns to fail as well as unresolved
|
|
||||||
deadlocks that leave the system unresponsive.
|
|
||||||
|
|
||||||
- Ensure that the InnoDB log buffer is written to file once per second,
|
|
||||||
rather than on each commit, to improve performance:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
innodb_flush_log_at_trx_commit=0
|
|
||||||
|
|
||||||
Bear in mind, while setting this parameter to ``1`` or ``2`` can improve
|
|
||||||
performance, it introduces certain dangers. Operating system failures can
|
|
||||||
erase the last second of transactions. While you can recover this data
|
|
||||||
from another node, if the cluster goes down at the same time
|
|
||||||
(in the event of a data center power outage), you lose this data permanently.
|
|
||||||
|
|
||||||
- Define the InnoDB memory buffer pool size. The default value is 128 MB,
|
|
||||||
but to compensate for Galera Cluster's additional memory usage, scale
|
|
||||||
your usual value back by 5%:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
innodb_buffer_pool_size=122M
|
|
||||||
|
|
||||||
|
|
||||||
Configuring wsrep replication
|
|
||||||
------------------------------
|
|
||||||
|
|
||||||
Galera Cluster configuration parameters all have the ``wsrep_`` prefix.
|
|
||||||
There are five that you must define for each cluster node in your
|
|
||||||
OpenStack database.
|
|
||||||
|
|
||||||
- **wsrep Provider** The Galera Replication Plugin serves as the wsrep
|
|
||||||
Provider for Galera Cluster. It is installed on your system as the
|
|
||||||
``libgalera_smm.so`` file. You must define the path to this file in
|
|
||||||
your ``my.cnf``.
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
wsrep_provider="/usr/lib/libgalera_smm.so"
|
|
||||||
|
|
||||||
- **Cluster Name** Define an arbitrary name for your cluster.
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
wsrep_cluster_name="my_example_cluster"
|
|
||||||
|
|
||||||
You must use the same name on every cluster node. The connection fails
|
|
||||||
when this value does not match.
|
|
||||||
|
|
||||||
- **Cluster Address** List the IP addresses for each cluster node.
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
wsrep_cluster_address="gcomm://192.168.1.1,192.168.1.2,192.168.1.3"
|
|
||||||
|
|
||||||
Replace the IP addresses given here with comma-separated list of each
|
|
||||||
OpenStack database in your cluster.
|
|
||||||
|
|
||||||
- **Node Name** Define the logical name of the cluster node.
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
wsrep_node_name="Galera1"
|
|
||||||
|
|
||||||
- **Node Address** Define the IP address of the cluster node.
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
wsrep_node_address="192.168.1.1"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Additional parameters
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
For a complete list of the available parameters, run the
|
|
||||||
``SHOW VARIABLES`` command from within the database client:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
SHOW VARIABLES LIKE 'wsrep_%';
|
|
||||||
|
|
||||||
+------------------------------+-------+
|
|
||||||
| Variable_name | Value |
|
|
||||||
+------------------------------+-------+
|
|
||||||
| wsrep_auto_increment_control | ON |
|
|
||||||
+------------------------------+-------+
|
|
||||||
| wsrep_causal_reads | OFF |
|
|
||||||
+------------------------------+-------+
|
|
||||||
| wsrep_certify_nonPK | ON |
|
|
||||||
+------------------------------+-------+
|
|
||||||
| ... | ... |
|
|
||||||
+------------------------------+-------+
|
|
||||||
| wsrep_sync_wait | 0 |
|
|
||||||
+------------------------------+-------+
|
|
||||||
|
|
||||||
For the documentation of these parameters, wsrep Provider option and status
|
|
||||||
variables available in Galera Cluster, see `Reference
|
|
||||||
<http://galeracluster.com/documentation-webpages/reference.html>`_.
|
|
@ -1,275 +0,0 @@
|
|||||||
Installation
|
|
||||||
=============
|
|
||||||
|
|
||||||
Using Galera Cluster requires that you install two packages. The first is
|
|
||||||
the database server, which must include the wsrep API patch. The second
|
|
||||||
package is the Galera Replication Plugin, which enables the write-set
|
|
||||||
replication service functionality with the database server.
|
|
||||||
|
|
||||||
There are three implementations of Galera Cluster: MySQL, MariaDB and
|
|
||||||
Percona XtraDB. For each implementation, there is a software repository that
|
|
||||||
provides binary packages for Debian, Red Hat, and SUSE-based Linux
|
|
||||||
distributions.
|
|
||||||
|
|
||||||
|
|
||||||
Enabling the repository
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Galera Cluster is not available in the base repositories of Linux
|
|
||||||
distributions. In order to install it with your package manage, you must
|
|
||||||
first enable the repository on your system. The particular methods for
|
|
||||||
doing so vary depending on which distribution you use for OpenStack and
|
|
||||||
which database server you want to use.
|
|
||||||
|
|
||||||
Debian
|
|
||||||
-------
|
|
||||||
|
|
||||||
For Debian and Debian-based distributions, such as Ubuntu, complete the
|
|
||||||
following steps:
|
|
||||||
|
|
||||||
#. Add the GnuPG key for the database repository that you want to use.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# apt-key adv --recv-keys --keyserver \
|
|
||||||
keyserver.ubuntu.com BC19DDBA
|
|
||||||
|
|
||||||
Note that the particular key value in this command varies depending on
|
|
||||||
which database software repository you want to use.
|
|
||||||
|
|
||||||
+--------------------------+------------------------+
|
|
||||||
| Database | Key |
|
|
||||||
+==========================+========================+
|
|
||||||
| Galera Cluster for MySQL | ``BC19DDBA`` |
|
|
||||||
+--------------------------+------------------------+
|
|
||||||
| MariaDB Galera Cluster | ``0xcbcb082a1bb943db`` |
|
|
||||||
+--------------------------+------------------------+
|
|
||||||
| Percona XtraDB Cluster | ``1C4CBDCDCD2EFD2A`` |
|
|
||||||
+--------------------------+------------------------+
|
|
||||||
|
|
||||||
#. Add the repository to your sources list. Using your preferred text
|
|
||||||
editor, create a ``galera.list`` file in the ``/etc/apt/sources.list.d/``
|
|
||||||
directory. For the contents of this file, use the lines that pertain to
|
|
||||||
the software repository you want to install:
|
|
||||||
|
|
||||||
.. code-block:: linux-config
|
|
||||||
|
|
||||||
# Galera Cluster for MySQL
|
|
||||||
deb http://releases.galeracluster.com/DISTRO RELEASE main
|
|
||||||
|
|
||||||
# MariaDB Galera Cluster
|
|
||||||
deb http://mirror.jmu.edu/pub/mariadb/repo/VERSION/DISTRO RELEASE main
|
|
||||||
|
|
||||||
# Percona XtraDB Cluster
|
|
||||||
deb http://repo.percona.com/apt RELEASE main
|
|
||||||
|
|
||||||
For each entry: Replace all instances of ``DISTRO`` with the distribution
|
|
||||||
that you use, such as ``debian`` or ``ubuntu``. Replace all instances of
|
|
||||||
``RELEASE`` with the release of that distribution, such as ``wheezy`` or
|
|
||||||
``trusty``. Replace all instances of ``VERSION`` with the version of the
|
|
||||||
database server that you want to install, such as ``5.6`` or ``10.0``.
|
|
||||||
|
|
||||||
.. note:: In the event that you do not know the release code-name for
|
|
||||||
your distribution, you can use the following command to
|
|
||||||
find it out:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ lsb_release -a
|
|
||||||
|
|
||||||
|
|
||||||
#. Update the local cache.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# apt-get update
|
|
||||||
|
|
||||||
Packages in the Galera Cluster Debian repository are now available for
|
|
||||||
installation on your system.
|
|
||||||
|
|
||||||
Red Hat
|
|
||||||
--------
|
|
||||||
|
|
||||||
For Red Hat Enterprise Linux and Red Hat-based Linux distributions, the
|
|
||||||
process is more straightforward. In this file, only enter the text for
|
|
||||||
the repository you want to use.
|
|
||||||
|
|
||||||
- For Galera Cluster for MySQL, using your preferred text editor, create a
|
|
||||||
``Galera.repo`` file in the ``/etc/yum.repos.d/`` directory.
|
|
||||||
|
|
||||||
.. code-block:: linux-config
|
|
||||||
|
|
||||||
[galera]
|
|
||||||
name = Galera Cluster for MySQL
|
|
||||||
baseurl = http://releases.galeracluster.com/DISTRO/RELEASE/ARCH
|
|
||||||
gpgkey = http://releases.galeracluster.com/GPG-KEY-galeracluster.com
|
|
||||||
gpgcheck = 1
|
|
||||||
|
|
||||||
Replace ``DISTRO`` with the name of the distribution you use, such as
|
|
||||||
``centos`` or ``fedora``. Replace ``RELEASE`` with the release number,
|
|
||||||
such as ``7`` for CentOS 7. Replace ``ARCH`` with your system
|
|
||||||
architecture, such as ``x86_64``
|
|
||||||
|
|
||||||
- For MariaDB Galera Cluster, using your preferred text editor, create a
|
|
||||||
``Galera.repo`` file in the ``/etc/yum.repos.d/`` directory.
|
|
||||||
|
|
||||||
.. code-block:: linux-config
|
|
||||||
|
|
||||||
[mariadb]
|
|
||||||
name = MariaDB Galera Cluster
|
|
||||||
baseurl = http://yum.mariadb.org/VERSION/PACKAGE
|
|
||||||
gpgkey = https://yum.mariadb.org/RPM-GPG-KEY-MariaDB
|
|
||||||
gpgcheck = 1
|
|
||||||
|
|
||||||
Replace ``VERSION`` with the version of MariaDB you want to install, such
|
|
||||||
as ``5.6`` or ``10.0``. Replace ``PACKAGE`` with the package type and
|
|
||||||
architecture, such as ``rhel6-amd64`` for Red Hat 6 on 64-bit
|
|
||||||
architecture.
|
|
||||||
|
|
||||||
- For Percona XtraDB Cluster, run the following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# yum install http://www.percona.com/downloads/percona-release/redhat/0.1-3/percona-release-0.1-3.noarch.rpm
|
|
||||||
|
|
||||||
Bear in mind that the Percona repository only supports Red Hat Enterprise
|
|
||||||
Linux and CentOS distributions.
|
|
||||||
|
|
||||||
Packages in the Galera Cluster Red Hat repository are not available for
|
|
||||||
installation on your system.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
SUSE
|
|
||||||
-----
|
|
||||||
|
|
||||||
For SUSE Enterprise Linux and SUSE-based distributions, such as openSUSE
|
|
||||||
binary installations are only available for Galera Cluster for MySQL and
|
|
||||||
MariaDB Galera Cluster.
|
|
||||||
|
|
||||||
#. Create a ``Galera.repo`` file in the local directory. For Galera Cluster
|
|
||||||
for MySQL, use the following content:
|
|
||||||
|
|
||||||
.. code-block:: linux-config
|
|
||||||
|
|
||||||
[galera]
|
|
||||||
name = Galera Cluster for MySQL
|
|
||||||
baseurl = http://releases.galeracluster.com/DISTRO/RELEASE
|
|
||||||
gpgkey = http://releases.galeracluster.com/GPG-KEY-galeracluster.com
|
|
||||||
gpgcheck = 1
|
|
||||||
|
|
||||||
In the text: Replace ``DISTRO`` with the name of the distribution you
|
|
||||||
use, such as ``sles`` or ``opensuse``. Replace ``RELEASE`` with the
|
|
||||||
version number of that distribution.
|
|
||||||
|
|
||||||
For MariaDB Galera Cluster, instead use this content:
|
|
||||||
|
|
||||||
.. code-block:: linux-config
|
|
||||||
|
|
||||||
[mariadb]
|
|
||||||
name = MariaDB Galera Cluster
|
|
||||||
baseurl = http://yum.mariadb.org/VERSION/PACKAGE
|
|
||||||
gpgkey = https://yum.mariadb.org/RPM-GPG-KEY-MariaDB
|
|
||||||
gpgcheck = 1
|
|
||||||
|
|
||||||
In the text: Replace ``VERSION`` with the version of MariaDB you want to
|
|
||||||
install, such as ``5.6`` or ``10.0``. Replace package with the package
|
|
||||||
architecture you want to use, such as ``opensuse13-amd64``.
|
|
||||||
|
|
||||||
#. Add the repository to your system:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ sudo zypper addrepo Galera.repo
|
|
||||||
|
|
||||||
#. Refresh ``zypper``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ sudo zypper refresh
|
|
||||||
|
|
||||||
Packages in the Galera Cluster SUSE repository are now available for
|
|
||||||
installation.
|
|
||||||
|
|
||||||
|
|
||||||
Installing Galera Cluster
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
When you finish enabling the software repository for Galera Cluster, you can
|
|
||||||
install it using your package manager. The particular command and packages
|
|
||||||
you need to install varies depending on which database server you want to
|
|
||||||
install and which Linux distribution you use:
|
|
||||||
|
|
||||||
Galera Cluster for MySQL:
|
|
||||||
|
|
||||||
|
|
||||||
- For Debian and Debian-based distributions, such as Ubuntu, run the
|
|
||||||
following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# apt-get install galera-3 mysql-wsrep-5.6
|
|
||||||
|
|
||||||
- For Red Hat Enterprise Linux and Red Hat-based distributions, such as
|
|
||||||
Fedora or CentOS, instead run this command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# yum install galera-3 mysql-wsrep-5.6
|
|
||||||
|
|
||||||
- For SUSE Enterprise Linux Server and SUSE-based distributions, such as
|
|
||||||
openSUSE, instead run this command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# zypper install galera-3 mysql-wsrep-5.6
|
|
||||||
|
|
||||||
|
|
||||||
MariaDB Galera Cluster:
|
|
||||||
|
|
||||||
- For Debian and Debian-based distributions, such as Ubuntu, run the
|
|
||||||
following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# apt-get install galera mariadb-galera-server
|
|
||||||
|
|
||||||
- For Red Hat Enterprise Linux and Red Hat-based distributions, such as
|
|
||||||
Fedora or CentOS, instead run this command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# yum install galera MariaDB-Galera-server
|
|
||||||
|
|
||||||
- For SUSE Enterprise Linux Server and SUSE-based distributions, such as
|
|
||||||
openSUSE, instead run this command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# zypper install galera MariaDB-Galera-server
|
|
||||||
|
|
||||||
|
|
||||||
Percona XtraDB Cluster:
|
|
||||||
|
|
||||||
|
|
||||||
- For Debian and Debian-based distributions, such as Ubuntu, run the
|
|
||||||
following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# apt-get install percona-xtradb-cluster
|
|
||||||
|
|
||||||
- For Red Hat Enterprise Linux and Red Hat-based distributions, such as
|
|
||||||
Fedora or CentOS, instead run this command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# yum install Percona-XtraDB-Cluster
|
|
||||||
|
|
||||||
Galera Cluster is now installed on your system. You must repeat this
|
|
||||||
process for each controller node in your cluster.
|
|
||||||
|
|
||||||
.. warning:: In the event that you already installed the standalone version
|
|
||||||
of MySQL, MariaDB or Percona XtraDB, this installation purges all
|
|
||||||
privileges on your OpenStack database server. You must reapply the
|
|
||||||
privileges listed in the installation guide.
|
|
@ -1,256 +0,0 @@
|
|||||||
Management
|
|
||||||
===========
|
|
||||||
|
|
||||||
When you finish the installation and configuration process on each
|
|
||||||
cluster node in your OpenStack database, you can initialize Galera Cluster.
|
|
||||||
|
|
||||||
Before you attempt this, verify that you have the following ready:
|
|
||||||
|
|
||||||
- Database hosts with Galera Cluster installed. You need a
|
|
||||||
minimum of three hosts;
|
|
||||||
- No firewalls between the hosts;
|
|
||||||
- SELinux and AppArmor set to permit access to ``mysqld``;
|
|
||||||
- The correct path to ``libgalera_smm.so`` given to the
|
|
||||||
``wsrep_provider`` parameter.
|
|
||||||
|
|
||||||
Initializing the cluster
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
In Galera Cluster, the Primary Component is the cluster of database
|
|
||||||
servers that replicate into each other. In the event that a
|
|
||||||
cluster node loses connectivity with the Primary Component, it
|
|
||||||
defaults into a non-operational state, to avoid creating or serving
|
|
||||||
inconsistent data.
|
|
||||||
|
|
||||||
By default, cluster nodes do not start as part of a Primary
|
|
||||||
Component. Instead they assume that one exists somewhere and
|
|
||||||
attempts to establish a connection with it. To create a Primary
|
|
||||||
Component, you must start one cluster node using the
|
|
||||||
``--wsrep-new-cluster`` option. You can do this using any cluster
|
|
||||||
node, it is not important which you choose. In the Primary
|
|
||||||
Component, replication and state transfers bring all databases to
|
|
||||||
the same state.
|
|
||||||
|
|
||||||
To start the cluster, complete the following steps:
|
|
||||||
|
|
||||||
#. Initialize the Primary Component on one cluster node. For
|
|
||||||
servers that use ``init``, run the following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service mysql start --wsrep-new-cluster
|
|
||||||
|
|
||||||
For servers that use ``systemd``, instead run this command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# systemctl start mysql --wsrep-new-cluster
|
|
||||||
|
|
||||||
#. Once the database server starts, check the cluster status using
|
|
||||||
the ``wsrep_cluster_size`` status variable. From the database
|
|
||||||
client, run the following command:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
SHOW STATUS LIKE 'wsrep_cluster_size';
|
|
||||||
|
|
||||||
+--------------------+-------+
|
|
||||||
| Variable_name | Value |
|
|
||||||
+--------------------+-------+
|
|
||||||
| wsrep_cluster_size | 1 |
|
|
||||||
+--------------------+-------+
|
|
||||||
|
|
||||||
#. Start the database server on all other cluster nodes. For
|
|
||||||
servers that use ``init``, run the following command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service mysql start
|
|
||||||
|
|
||||||
For servers that use ``systemd``, instead run this command:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# systemctl start mysql
|
|
||||||
|
|
||||||
#. When you have all cluster nodes started, log into the database
|
|
||||||
client on one of them and check the ``wsrep_cluster_size``
|
|
||||||
status variable again.
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
SHOW STATUS LIKE 'wsrep_cluster_size';
|
|
||||||
|
|
||||||
+--------------------+-------+
|
|
||||||
| Variable_name | Value |
|
|
||||||
+--------------------+-------+
|
|
||||||
| wsrep_cluster_size | 3 |
|
|
||||||
+--------------------+-------+
|
|
||||||
|
|
||||||
When each cluster node starts, it checks the IP addresses given to
|
|
||||||
the ``wsrep_cluster_address`` parameter and attempts to establish
|
|
||||||
network connectivity with a database server running there. Once it
|
|
||||||
establishes a connection, it attempts to join the Primary
|
|
||||||
Component, requesting a state transfer as needed to bring itself
|
|
||||||
into sync with the cluster.
|
|
||||||
|
|
||||||
In the event that you need to restart any cluster node, you can do
|
|
||||||
so. When the database server comes back it, it establishes
|
|
||||||
connectivity with the Primary Component and updates itself to any
|
|
||||||
changes it may have missed while down.
|
|
||||||
|
|
||||||
|
|
||||||
Restarting the cluster
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
Individual cluster nodes can stop and be restarted without issue.
|
|
||||||
When a database loses its connection or restarts, Galera Cluster
|
|
||||||
brings it back into sync once it reestablishes connection with the
|
|
||||||
Primary Component. In the event that you need to restart the
|
|
||||||
entire cluster, identify the most advanced cluster node and
|
|
||||||
initialize the Primary Component on that node.
|
|
||||||
|
|
||||||
To find the most advanced cluster node, you need to check the
|
|
||||||
sequence numbers, or seqnos, on the last committed transaction for
|
|
||||||
each. You can find this by viewing ``grastate.dat`` file in
|
|
||||||
database directory,
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ cat /path/to/datadir/grastate.dat
|
|
||||||
|
|
||||||
# Galera saved state
|
|
||||||
version: 3.8
|
|
||||||
uuid: 5ee99582-bb8d-11e2-b8e3-23de375c1d30
|
|
||||||
seqno: 8204503945773
|
|
||||||
|
|
||||||
Alternatively, if the database server is running, use the
|
|
||||||
``wsrep_last_committed`` status variable:
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
SHOW STATUS LIKE 'wsrep_last_committed';
|
|
||||||
|
|
||||||
+----------------------+--------+
|
|
||||||
| Variable_name | Value |
|
|
||||||
+----------------------+--------+
|
|
||||||
| wsrep_last_committed | 409745 |
|
|
||||||
+----------------------+--------+
|
|
||||||
|
|
||||||
This value increments with each transaction, so the most advanced
|
|
||||||
node has the highest sequence number, and therefore is the most up to date.
|
|
||||||
|
|
||||||
|
|
||||||
Configuration tips
|
|
||||||
~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
|
|
||||||
Deployment strategies
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
Galera can be configured using one of the following
|
|
||||||
strategies:
|
|
||||||
|
|
||||||
- Each instance has its own IP address;
|
|
||||||
|
|
||||||
OpenStack services are configured with the list of these IP
|
|
||||||
addresses so they can select one of the addresses from those
|
|
||||||
available.
|
|
||||||
|
|
||||||
- Galera runs behind HAProxy.
|
|
||||||
|
|
||||||
HAProxy load balances incoming requests and exposes just one IP
|
|
||||||
address for all the clients.
|
|
||||||
|
|
||||||
Galera synchronous replication guarantees a zero slave lag. The
|
|
||||||
failover procedure completes once HAProxy detects that the active
|
|
||||||
back end has gone down and switches to the backup one, which is
|
|
||||||
then marked as 'UP'. If no back ends are up (in other words, the
|
|
||||||
Galera cluster is not ready to accept connections), the failover
|
|
||||||
procedure finishes only when the Galera cluster has been
|
|
||||||
successfully reassembled. The SLA is normally no more than 5
|
|
||||||
minutes.
|
|
||||||
|
|
||||||
- Use MySQL/Galera in active/passive mode to avoid deadlocks on
|
|
||||||
``SELECT ... FOR UPDATE`` type queries (used, for example, by nova
|
|
||||||
and neutron). This issue is discussed more in the following:
|
|
||||||
|
|
||||||
- http://lists.openstack.org/pipermail/openstack-dev/2014-May/035264.html
|
|
||||||
- http://www.joinfu.com/
|
|
||||||
|
|
||||||
Of these options, the second one is highly recommended. Although Galera
|
|
||||||
supports active/active configurations, we recommend active/passive
|
|
||||||
(enforced by the load balancer) in order to avoid lock contention.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Configuring HAProxy
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
If you use HAProxy for load-balancing client access to Galera
|
|
||||||
Cluster as described in the :doc:`controller-ha-haproxy`, you can
|
|
||||||
use the ``clustercheck`` utility to improve health checks.
|
|
||||||
|
|
||||||
#. Create a configuration file for ``clustercheck`` at
|
|
||||||
``/etc/sysconfig/clustercheck``:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
MYSQL_USERNAME="clustercheck_user"
|
|
||||||
MYSQL_PASSWORD="my_clustercheck_password"
|
|
||||||
MYSQL_HOST="localhost"
|
|
||||||
MYSQL_PORT="3306"
|
|
||||||
|
|
||||||
#. Log in to the database client and grant the ``clustercheck`` user
|
|
||||||
``PROCESS`` privileges.
|
|
||||||
|
|
||||||
.. code-block:: mysql
|
|
||||||
|
|
||||||
GRANT PROCESS ON *.* TO 'clustercheck_user'@'localhost'
|
|
||||||
IDENTIFIED BY 'my_clustercheck_password';
|
|
||||||
|
|
||||||
FLUSH PRIVILEGES;
|
|
||||||
|
|
||||||
You only need to do this on one cluster node. Galera Cluster
|
|
||||||
replicates the user to all the others.
|
|
||||||
|
|
||||||
#. Create a configuration file for the HAProxy monitor service, at
|
|
||||||
``/etc/xinetd.d/galera-monitor``:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
service galera-monitor
|
|
||||||
{
|
|
||||||
port = 9200
|
|
||||||
disable = no
|
|
||||||
socket_type = stream
|
|
||||||
protocol = tcp
|
|
||||||
wait = no
|
|
||||||
user = root
|
|
||||||
group = root
|
|
||||||
groups = yes
|
|
||||||
server = /usr/bin/clustercheck
|
|
||||||
type = UNLISTED
|
|
||||||
per_source = UNLIMITED
|
|
||||||
log_on_success =
|
|
||||||
log_on_failure = HOST
|
|
||||||
flags = REUSE
|
|
||||||
}
|
|
||||||
|
|
||||||
#. Start the ``xinetd`` daemon for ``clustercheck``. For servers
|
|
||||||
that use ``init``, run the following commands:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service xinetd enable
|
|
||||||
# service xinetd start
|
|
||||||
|
|
||||||
For servers that use ``systemd``, instead run these commands:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# systemctl daemon-reload
|
|
||||||
# systemctl enable xinetd
|
|
||||||
# systemctl start xinetd
|
|
||||||
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
|||||||
Database (Galera Cluster)
|
|
||||||
==========================
|
|
||||||
|
|
||||||
The first step is to install the database that sits at the heart of the
|
|
||||||
cluster. To implement high availability, run an instance of the database on
|
|
||||||
each controller node and use Galera Cluster to provide replication between
|
|
||||||
them. Galera Cluster is a synchronous multi-master database cluster, based
|
|
||||||
on MySQL and the InnoDB storage engine. It is a high-availability service
|
|
||||||
that provides high system uptime, no data loss, and scalability for growth.
|
|
||||||
|
|
||||||
You can achieve high availability for the OpenStack database in many
|
|
||||||
different ways, depending on the type of database that you want to use.
|
|
||||||
There are three implementations of Galera Cluster available to you:
|
|
||||||
|
|
||||||
- `Galera Cluster for MySQL <http://galeracluster.com/>`_ The MySQL
|
|
||||||
reference implementation from Codership, Oy;
|
|
||||||
- `MariaDB Galera Cluster <https://mariadb.org/>`_ The MariaDB
|
|
||||||
implementation of Galera Cluster, which is commonly supported in
|
|
||||||
environments based on Red Hat distributions;
|
|
||||||
- `Percona XtraDB Cluster <http://www.percona.com/>`_ The XtraDB
|
|
||||||
implementation of Galera Cluster from Percona.
|
|
||||||
|
|
||||||
In addition to Galera Cluster, you can also achieve high availability
|
|
||||||
through other database options, such as PostgreSQL, which has its own
|
|
||||||
replication system.
|
|
||||||
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
controller-ha-galera-install
|
|
||||||
controller-ha-galera-config
|
|
||||||
controller-ha-galera-manage
|
|
@ -1,229 +0,0 @@
|
|||||||
=======
|
|
||||||
HAProxy
|
|
||||||
=======
|
|
||||||
|
|
||||||
HAProxy provides a fast and reliable HTTP reverse proxy and load balancer
|
|
||||||
for TCP or HTTP applications. It is particularly suited for web crawling
|
|
||||||
under very high loads while needing persistence or Layer 7 processing.
|
|
||||||
It realistically supports tens of thousands of connections with recent
|
|
||||||
hardware.
|
|
||||||
|
|
||||||
Each instance of HAProxy configures its front end to accept connections
|
|
||||||
only from the virtual IP (VIP) address and to terminate them as a list
|
|
||||||
of all instances of the corresponding service under load balancing,
|
|
||||||
such as any OpenStack API service.
|
|
||||||
|
|
||||||
This makes the instances of HAProxy act independently and fail over
|
|
||||||
transparently together with the network endpoints (VIP addresses)
|
|
||||||
failover and, therefore, shares the same SLA.
|
|
||||||
|
|
||||||
You can alternatively use a commercial load balancer, which is a hardware
|
|
||||||
or software. A hardware load balancer generally has good performance.
|
|
||||||
|
|
||||||
For detailed instructions about installing HAProxy on your nodes,
|
|
||||||
see its `official documentation <http://www.haproxy.org/#docs>`_.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
HAProxy should not be a single point of failure.
|
|
||||||
It is advisable to have multiple HAProxy instances running,
|
|
||||||
where the number of these instances is a small odd number like 3 or 5.
|
|
||||||
You need to ensure its availability by other means,
|
|
||||||
such as Keepalived or Pacemaker.
|
|
||||||
|
|
||||||
The common practice is to locate an HAProxy instance on each OpenStack
|
|
||||||
controller in the environment.
|
|
||||||
|
|
||||||
Once configured (see example file below), add HAProxy to the cluster
|
|
||||||
and ensure the VIPs can only run on machines where HAProxy is active:
|
|
||||||
|
|
||||||
``pcs``
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ pcs resource create lb-haproxy systemd:haproxy --clone
|
|
||||||
$ pcs constraint order start p_api-ip then lb-haproxy-clone kind=Optional
|
|
||||||
$ pcs constraint colocation add p_api-ip with lb-haproxy-clone
|
|
||||||
|
|
||||||
``crmsh``
|
|
||||||
|
|
||||||
TBA
|
|
||||||
|
|
||||||
Example Config File
|
|
||||||
~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Here is an example ``/etc/haproxy/haproxy.cfg`` configuration file.
|
|
||||||
You need a copy of it on each controller node.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
To implement any changes made to this you must restart the HAProxy service
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
global
|
|
||||||
chroot /var/lib/haproxy
|
|
||||||
daemon
|
|
||||||
group haproxy
|
|
||||||
maxconn 4000
|
|
||||||
pidfile /var/run/haproxy.pid
|
|
||||||
user haproxy
|
|
||||||
|
|
||||||
defaults
|
|
||||||
log global
|
|
||||||
maxconn 4000
|
|
||||||
option redispatch
|
|
||||||
retries 3
|
|
||||||
timeout http-request 10s
|
|
||||||
timeout queue 1m
|
|
||||||
timeout connect 10s
|
|
||||||
timeout client 1m
|
|
||||||
timeout server 1m
|
|
||||||
timeout check 10s
|
|
||||||
|
|
||||||
listen dashboard_cluster
|
|
||||||
bind <Virtual IP>:443
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option httpchk
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:443 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:443 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:443 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen galera_cluster
|
|
||||||
bind <Virtual IP>:3306
|
|
||||||
balance source
|
|
||||||
option mysql-check
|
|
||||||
server controller1 10.0.0.12:3306 check port 9200 inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:3306 backup check port 9200 inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:3306 backup check port 9200 inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen glance_api_cluster
|
|
||||||
bind <Virtual IP>:9292
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option httpchk
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:9292 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:9292 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:9292 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen glance_registry_cluster
|
|
||||||
bind <Virtual IP>:9191
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:9191 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:9191 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:9191 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen keystone_admin_cluster
|
|
||||||
bind <Virtual IP>:35357
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option httpchk
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:35357 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:35357 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:35357 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen keystone_public_internal_cluster
|
|
||||||
bind <Virtual IP>:5000
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option httpchk
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:5000 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:5000 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:5000 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen nova_ec2_api_cluster
|
|
||||||
bind <Virtual IP>:8773
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:8773 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:8773 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:8773 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen nova_compute_api_cluster
|
|
||||||
bind <Virtual IP>:8774
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option httpchk
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:8774 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:8774 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:8774 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen nova_metadata_api_cluster
|
|
||||||
bind <Virtual IP>:8775
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:8775 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:8775 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:8775 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen cinder_api_cluster
|
|
||||||
bind <Virtual IP>:8776
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option httpchk
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:8776 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:8776 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:8776 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen ceilometer_api_cluster
|
|
||||||
bind <Virtual IP>:8777
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:8777 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:8777 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:8777 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen nova_vncproxy_cluster
|
|
||||||
bind <Virtual IP>:6080
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:6080 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:6080 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:6080 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen neutron_api_cluster
|
|
||||||
bind <Virtual IP>:9696
|
|
||||||
balance source
|
|
||||||
option tcpka
|
|
||||||
option httpchk
|
|
||||||
option tcplog
|
|
||||||
server controller1 10.0.0.12:9696 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:9696 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:9696 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
listen swift_proxy_cluster
|
|
||||||
bind <Virtual IP>:8080
|
|
||||||
balance source
|
|
||||||
option tcplog
|
|
||||||
option tcpka
|
|
||||||
server controller1 10.0.0.12:8080 check inter 2000 rise 2 fall 5
|
|
||||||
server controller2 10.0.0.13:8080 check inter 2000 rise 2 fall 5
|
|
||||||
server controller3 10.0.0.14:8080 check inter 2000 rise 2 fall 5
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
The Galera cluster configuration directive ``backup`` indicates
|
|
||||||
that two of the three controllers are standby nodes.
|
|
||||||
This ensures that only one node services write requests
|
|
||||||
because OpenStack support for multi-node writes is not yet production-ready.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
The Telemetry API service configuration does not have the ``option httpchk``
|
|
||||||
directive as it cannot process this check properly.
|
|
||||||
TODO: explain why the Telemetry API is so special
|
|
||||||
|
|
||||||
[TODO: we need more commentary about the contents and format of this file]
|
|
@ -1,147 +0,0 @@
|
|||||||
|
|
||||||
============================
|
|
||||||
Identity services (keystone)
|
|
||||||
============================
|
|
||||||
|
|
||||||
OpenStack Identity (keystone)
|
|
||||||
is the Identity service in OpenStack that is used by many services.
|
|
||||||
You should be familiar with
|
|
||||||
`OpenStack identity concepts
|
|
||||||
<http://docs.openstack.org/liberty/install-guide-ubuntu/common/get_started_identity.html>`_
|
|
||||||
before proceeding.
|
|
||||||
|
|
||||||
Making the OpenStack Identity service highly available
|
|
||||||
in active / passive mode involves:
|
|
||||||
|
|
||||||
- :ref:`keystone-pacemaker`
|
|
||||||
- :ref:`keystone-config-identity`
|
|
||||||
- :ref:`keystone-services-config`
|
|
||||||
|
|
||||||
.. _keystone-pacemaker:
|
|
||||||
|
|
||||||
Add OpenStack Identity resource to Pacemaker
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
#. You must first download the OpenStack Identity resource to Pacemaker
|
|
||||||
by running the following commands:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cd /usr/lib/ocf/resource.d
|
|
||||||
# mkdir openstack
|
|
||||||
# cd openstack
|
|
||||||
# wget https://git.openstack.org/cgit/openstack/openstack-resource-agents/plain/ocf/keystone
|
|
||||||
# chmod a+rx *
|
|
||||||
|
|
||||||
#. You can now add the Pacemaker configuration
|
|
||||||
for the OpenStack Identity resource
|
|
||||||
by running the :command:`crm configure` command
|
|
||||||
to connect to the Pacemaker cluster.
|
|
||||||
Add the following cluster resources:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
primitive p_keystone ocf:openstack:keystone \
|
|
||||||
params config="/etc/keystone/keystone.conf"
|
|
||||||
os_password="secretsecret" \
|
|
||||||
os_username="admin"
|
|
||||||
os_tenant_name="admin"
|
|
||||||
os_auth_url="http://10.0.0.11:5000/v2.0/" \
|
|
||||||
op monitor interval="30s" timeout="30s"
|
|
||||||
|
|
||||||
This configuration creates ``p_keystone``,
|
|
||||||
a resource for managing the OpenStack Identity service.
|
|
||||||
|
|
||||||
:command:`crm configure` supports batch input
|
|
||||||
so you may copy and paste the above lines
|
|
||||||
into your live Pacemaker configuration,
|
|
||||||
and then make changes as required.
|
|
||||||
For example, you may enter edit ``p_ip_keystone``
|
|
||||||
from the :command:`crm configure` menu
|
|
||||||
and edit the resource to match your preferred virtual IP address.
|
|
||||||
|
|
||||||
#. After you add these resources,
|
|
||||||
commit your configuration changes by entering :command:`commit`
|
|
||||||
from the :command:`crm configure` menu.
|
|
||||||
Pacemaker then starts the OpenStack Identity service
|
|
||||||
and its dependent resources on one of your nodes.
|
|
||||||
|
|
||||||
.. _keystone-config-identity:
|
|
||||||
|
|
||||||
Configure OpenStack Identity service
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
#. Edit the :file:`keystone.conf` file
|
|
||||||
to change the values of the :manpage:`bind(2)` parameters:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
bind_host = 10.0.0.11
|
|
||||||
public_bind_host = 10.0.0.11
|
|
||||||
admin_bind_host = 10.0.0.11
|
|
||||||
|
|
||||||
The ``admin_bind_host`` parameter
|
|
||||||
lets you use a private network for admin access.
|
|
||||||
|
|
||||||
#. To be sure that all data is highly available,
|
|
||||||
ensure that everything is stored in the MySQL database
|
|
||||||
(which is also highly available):
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
[catalog]
|
|
||||||
driver = keystone.catalog.backends.sql.Catalog
|
|
||||||
...
|
|
||||||
[identity]
|
|
||||||
driver = keystone.identity.backends.sql.Identity
|
|
||||||
...
|
|
||||||
|
|
||||||
|
|
||||||
.. _keystone-services-config:
|
|
||||||
|
|
||||||
Configure OpenStack services to use the highly available OpenStack Identity
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Your OpenStack services must now point
|
|
||||||
their OpenStack Identity configuration
|
|
||||||
to the highly available virtual cluster IP address
|
|
||||||
rather than point to the physical IP address
|
|
||||||
of an OpenStack Identity server as you would do
|
|
||||||
in a non-HA environment.
|
|
||||||
|
|
||||||
#. For OpenStack Compute, for example,
|
|
||||||
if your OpenStack Identiy service IP address is 10.0.0.11,
|
|
||||||
use the following configuration in your :file:`api-paste.ini` file:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
auth_host = 10.0.0.11
|
|
||||||
|
|
||||||
#. You also need to create the OpenStack Identity Endpoint
|
|
||||||
with this IP address.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
If you are using both private and public IP addresses,
|
|
||||||
you should create two virtual IP addresses
|
|
||||||
and define your endpoint like this:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack endpoint create --region $KEYSTONE_REGION \
|
|
||||||
$service-type public http://PUBLIC_VIP:5000/v2.0
|
|
||||||
$ openstack endpoint create --region $KEYSTONE_REGION \
|
|
||||||
$service-type admin http://10.0.0.11:35357/v2.0
|
|
||||||
$ openstack endpoint create --region $KEYSTONE_REGION \
|
|
||||||
$service-type internal http://10.0.0.11:5000/v2.0
|
|
||||||
|
|
||||||
|
|
||||||
#. If you are using the horizon dashboard,
|
|
||||||
edit the :file:`local_settings.py` file
|
|
||||||
to include the following:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
OPENSTACK_HOST = 10.0.0.11
|
|
||||||
|
|
||||||
|
|
@ -1,21 +0,0 @@
|
|||||||
===================
|
|
||||||
Memcached
|
|
||||||
===================
|
|
||||||
|
|
||||||
Memcached is a general-purpose distributed memory caching system. It
|
|
||||||
is used to speed up dynamic database-driven websites by caching data
|
|
||||||
and objects in RAM to reduce the number of times an external data
|
|
||||||
source must be read.
|
|
||||||
|
|
||||||
Memcached is a memory cache demon that can be used by most OpenStack
|
|
||||||
services to store ephemeral data, such as tokens.
|
|
||||||
|
|
||||||
Access to memcached is not handled by HAproxy because replicated
|
|
||||||
access is currently only in an experimental state. Instead OpenStack
|
|
||||||
services must be supplied with the full list of hosts running
|
|
||||||
memcached.
|
|
||||||
|
|
||||||
The Memcached client implements hashing to balance objects among the
|
|
||||||
instances. Failure of an instance only impacts a percentage of the
|
|
||||||
objects and the client automatically removes it from the list of
|
|
||||||
instances. The SLA is several minutes.
|
|
@ -1,633 +0,0 @@
|
|||||||
=======================
|
|
||||||
Pacemaker cluster stack
|
|
||||||
=======================
|
|
||||||
|
|
||||||
`Pacemaker <http://clusterlabs.org/>`_ cluster stack is the state-of-the-art
|
|
||||||
high availability and load balancing stack for the Linux platform.
|
|
||||||
Pacemaker is useful to make OpenStack infrastructure highly available.
|
|
||||||
Also, it is storage and application-agnostic, and in no way
|
|
||||||
specific to OpenStack.
|
|
||||||
|
|
||||||
Pacemaker relies on the
|
|
||||||
`Corosync <http://corosync.github.io/corosync/>`_ messaging layer
|
|
||||||
for reliable cluster communications.
|
|
||||||
Corosync implements the Totem single-ring ordering and membership protocol.
|
|
||||||
It also provides UDP and InfiniBand based messaging,
|
|
||||||
quorum, and cluster membership to Pacemaker.
|
|
||||||
|
|
||||||
Pacemaker does not inherently (need or want to) understand the
|
|
||||||
applications it manages. Instead, it relies on resource agents (RAs),
|
|
||||||
scripts that encapsulate the knowledge of how to start, stop, and
|
|
||||||
check the health of each application managed by the cluster.
|
|
||||||
|
|
||||||
These agents must conform to one of the `OCF <https://github.com/ClusterLabs/
|
|
||||||
OCF-spec/blob/master/ra/resource-agent-api.md>`_,
|
|
||||||
`SysV Init <http://refspecs.linux-foundation.org/LSB_3.0.0/LSB-Core-generic/
|
|
||||||
LSB-Core-generic/iniscrptact.html>`_, Upstart, or Systemd standards.
|
|
||||||
|
|
||||||
Pacemaker ships with a large set of OCF agents (such as those managing
|
|
||||||
MySQL databases, virtual IP addresses, and RabbitMQ), but can also use
|
|
||||||
any agents already installed on your system and can be extended with
|
|
||||||
your own (see the
|
|
||||||
`developer guide <http://www.linux-ha.org/doc/dev-guides/ra-dev-guide.html>`_).
|
|
||||||
|
|
||||||
The steps to implement the Pacemaker cluster stack are:
|
|
||||||
|
|
||||||
- :ref:`pacemaker-install`
|
|
||||||
- :ref:`pacemaker-corosync-setup`
|
|
||||||
- :ref:`pacemaker-corosync-start`
|
|
||||||
- :ref:`pacemaker-start`
|
|
||||||
- :ref:`pacemaker-cluster-properties`
|
|
||||||
|
|
||||||
.. _pacemaker-install:
|
|
||||||
|
|
||||||
Install packages
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
On any host that is meant to be part of a Pacemaker cluster,
|
|
||||||
you must first establish cluster communications
|
|
||||||
through the Corosync messaging layer.
|
|
||||||
This involves installing the following packages
|
|
||||||
(and their dependencies, which your package manager
|
|
||||||
usually installs automatically):
|
|
||||||
|
|
||||||
- pacemaker
|
|
||||||
|
|
||||||
- pcs (CentOS or RHEL) or crmsh
|
|
||||||
|
|
||||||
- corosync
|
|
||||||
|
|
||||||
- fence-agents (CentOS or RHEL) or cluster-glue
|
|
||||||
|
|
||||||
- resource-agents
|
|
||||||
|
|
||||||
- libqb0
|
|
||||||
|
|
||||||
.. _pacemaker-corosync-setup:
|
|
||||||
|
|
||||||
Set up the cluster with `pcs`
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
#. Make sure pcs is running and configured to start at boot time:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ systemctl enable pcsd
|
|
||||||
$ systemctl start pcsd
|
|
||||||
|
|
||||||
#. Set a password for hacluster user **on each host**.
|
|
||||||
|
|
||||||
Since the cluster is a single administrative domain, it is generally
|
|
||||||
accepted to use the same password on all nodes.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ echo my-secret-password-no-dont-use-this-one \
|
|
||||||
| passwd --stdin hacluster
|
|
||||||
|
|
||||||
#. Use that password to authenticate to the nodes which will
|
|
||||||
make up the cluster. The :option:`-p` option is used to give
|
|
||||||
the password on command line and makes it easier to script.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ pcs cluster auth controller1 controller2 controller3 \
|
|
||||||
-u hacluster -p my-secret-password-no-dont-use-this-one --force
|
|
||||||
|
|
||||||
#. Create the cluster, giving it a name, and start it:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ pcs cluster setup --force --name my-first-openstack-cluster \
|
|
||||||
controller1 controller2 controller3
|
|
||||||
$ pcs cluster start --all
|
|
||||||
|
|
||||||
.. note ::
|
|
||||||
|
|
||||||
In Red Hat Enterprise Linux or CentOS environments, this is a recommended
|
|
||||||
path to perform configuration. For more information, see the `RHEL docs
|
|
||||||
<https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/7/html/High_Availability_Add-On_Reference/ch-clusteradmin-HAAR.html#s1-clustercreate-HAAR>`_.
|
|
||||||
|
|
||||||
Set up the cluster with `crmsh`
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
After installing the Corosync package, you must create
|
|
||||||
the :file:`/etc/corosync/corosync.conf` configuration file.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
For Ubuntu, you should also enable the Corosync service
|
|
||||||
in the ``/etc/default/corosync`` configuration file.
|
|
||||||
|
|
||||||
Corosync can be configured to work
|
|
||||||
with either multicast or unicast IP addresses
|
|
||||||
or to use the votequorum library.
|
|
||||||
|
|
||||||
- :ref:`corosync-multicast`
|
|
||||||
- :ref:`corosync-unicast`
|
|
||||||
- :ref:`corosync-votequorum`
|
|
||||||
|
|
||||||
.. _corosync-multicast:
|
|
||||||
|
|
||||||
Set up Corosync with multicast
|
|
||||||
------------------------------
|
|
||||||
|
|
||||||
Most distributions ship an example configuration file
|
|
||||||
(:file:`corosync.conf.example`)
|
|
||||||
as part of the documentation bundled with the Corosync package.
|
|
||||||
An example Corosync configuration file is shown below:
|
|
||||||
|
|
||||||
**Example Corosync configuration file for multicast (corosync.conf)**
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
totem {
|
|
||||||
version: 2
|
|
||||||
|
|
||||||
# Time (in ms) to wait for a token (1)
|
|
||||||
token: 10000
|
|
||||||
|
|
||||||
# How many token retransmits before forming a new
|
|
||||||
# configuration
|
|
||||||
token_retransmits_before_loss_const: 10
|
|
||||||
|
|
||||||
# Turn off the virtual synchrony filter
|
|
||||||
vsftype: none
|
|
||||||
|
|
||||||
# Enable encryption (2)
|
|
||||||
secauth: on
|
|
||||||
|
|
||||||
# How many threads to use for encryption/decryption
|
|
||||||
threads: 0
|
|
||||||
|
|
||||||
# This specifies the redundant ring protocol, which may be
|
|
||||||
# none, active, or passive. (3)
|
|
||||||
rrp_mode: active
|
|
||||||
|
|
||||||
# The following is a two-ring multicast configuration. (4)
|
|
||||||
interface {
|
|
||||||
ringnumber: 0
|
|
||||||
bindnetaddr: 10.0.0.0
|
|
||||||
mcastaddr: 239.255.42.1
|
|
||||||
mcastport: 5405
|
|
||||||
}
|
|
||||||
interface {
|
|
||||||
ringnumber: 1
|
|
||||||
bindnetaddr: 10.0.42.0
|
|
||||||
mcastaddr: 239.255.42.2
|
|
||||||
mcastport: 5405
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
amf {
|
|
||||||
mode: disabled
|
|
||||||
}
|
|
||||||
|
|
||||||
service {
|
|
||||||
# Load the Pacemaker Cluster Resource Manager (5)
|
|
||||||
ver: 1
|
|
||||||
name: pacemaker
|
|
||||||
}
|
|
||||||
|
|
||||||
aisexec {
|
|
||||||
user: root
|
|
||||||
group: root
|
|
||||||
}
|
|
||||||
|
|
||||||
logging {
|
|
||||||
fileline: off
|
|
||||||
to_stderr: yes
|
|
||||||
to_logfile: no
|
|
||||||
to_syslog: yes
|
|
||||||
syslog_facility: daemon
|
|
||||||
debug: off
|
|
||||||
timestamp: on
|
|
||||||
logger_subsys {
|
|
||||||
subsys: AMF
|
|
||||||
debug: off
|
|
||||||
tags: enter|leave|trace1|trace2|trace3|trace4|trace6
|
|
||||||
}}
|
|
||||||
|
|
||||||
Note the following:
|
|
||||||
|
|
||||||
- The ``token`` value specifies the time, in milliseconds,
|
|
||||||
during which the Corosync token is expected
|
|
||||||
to be transmitted around the ring.
|
|
||||||
When this timeout expires, the token is declared lost,
|
|
||||||
and after ``token_retransmits_before_loss_const lost`` tokens,
|
|
||||||
the non-responding processor (cluster node) is declared dead.
|
|
||||||
In other words, ``token × token_retransmits_before_loss_const``
|
|
||||||
is the maximum time a node is allowed to not respond to cluster messages
|
|
||||||
before being considered dead.
|
|
||||||
The default for token is 1000 milliseconds (1 second),
|
|
||||||
with 4 allowed retransmits.
|
|
||||||
These defaults are intended to minimize failover times,
|
|
||||||
but can cause frequent "false alarms" and unintended failovers
|
|
||||||
in case of short network interruptions. The values used here are safer,
|
|
||||||
albeit with slightly extended failover times.
|
|
||||||
|
|
||||||
- With ``secauth`` enabled,
|
|
||||||
Corosync nodes mutually authenticate using a 128-byte shared secret
|
|
||||||
stored in the :file:`/etc/corosync/authkey` file,
|
|
||||||
which may be generated with the :command:`corosync-keygen` utility.
|
|
||||||
When using ``secauth``, cluster communications are also encrypted.
|
|
||||||
|
|
||||||
- In Corosync configurations using redundant networking
|
|
||||||
(with more than one interface),
|
|
||||||
you must select a Redundant Ring Protocol (RRP) mode other than none.
|
|
||||||
``active`` is the recommended RRP mode.
|
|
||||||
|
|
||||||
Note the following about the recommended interface configuration:
|
|
||||||
|
|
||||||
- Each configured interface must have a unique ``ringnumber``,
|
|
||||||
starting with 0.
|
|
||||||
|
|
||||||
- The ``bindnetaddr`` is the network address of the interfaces to bind to.
|
|
||||||
The example uses two network addresses of /24 IPv4 subnets.
|
|
||||||
|
|
||||||
- Multicast groups (``mcastaddr``) must not be reused
|
|
||||||
across cluster boundaries.
|
|
||||||
In other words, no two distinct clusters
|
|
||||||
should ever use the same multicast group.
|
|
||||||
Be sure to select multicast addresses compliant with
|
|
||||||
`RFC 2365, "Administratively Scoped IP Multicast"
|
|
||||||
<http://www.ietf.org/rfc/rfc2365.txt>`_.
|
|
||||||
|
|
||||||
- For firewall configurations,
|
|
||||||
note that Corosync communicates over UDP only,
|
|
||||||
and uses ``mcastport`` (for receives)
|
|
||||||
and ``mcastport - 1`` (for sends).
|
|
||||||
|
|
||||||
- The service declaration for the pacemaker service
|
|
||||||
may be placed in the :file:`corosync.conf` file directly
|
|
||||||
or in its own separate file, :file:`/etc/corosync/service.d/pacemaker`.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
If you are using Corosync version 2 on Ubuntu 14.04,
|
|
||||||
remove or comment out lines under the service stanza,
|
|
||||||
which enables Pacemaker to start up. Another potential
|
|
||||||
problem is the boot and shutdown order of Corosync and
|
|
||||||
Pacemaker. To force Pacemaker to start after Corosync and
|
|
||||||
stop before Corosync, fix the start and kill symlinks manually:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# update-rc.d pacemaker start 20 2 3 4 5 . stop 00 0 1 6 .
|
|
||||||
|
|
||||||
The Pacemaker service also requires an additional
|
|
||||||
configuration file ``/etc/corosync/uidgid.d/pacemaker``
|
|
||||||
to be created with the following content:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
uidgid {
|
|
||||||
uid: hacluster
|
|
||||||
gid: haclient
|
|
||||||
}
|
|
||||||
|
|
||||||
- Once created, the :file:`corosync.conf` file
|
|
||||||
(and the :file:`authkey` file if the secauth option is enabled)
|
|
||||||
must be synchronized across all cluster nodes.
|
|
||||||
|
|
||||||
.. _corosync-unicast:
|
|
||||||
|
|
||||||
Set up Corosync with unicast
|
|
||||||
----------------------------
|
|
||||||
|
|
||||||
For environments that do not support multicast,
|
|
||||||
Corosync should be configured for unicast.
|
|
||||||
An example fragment of the :file:`corosync.conf` file
|
|
||||||
for unicastis shown below:
|
|
||||||
|
|
||||||
**Corosync configuration file fragment for unicast (corosync.conf)**
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
totem {
|
|
||||||
#...
|
|
||||||
interface {
|
|
||||||
ringnumber: 0
|
|
||||||
bindnetaddr: 10.0.0.0
|
|
||||||
broadcast: yes (1)
|
|
||||||
mcastport: 5405
|
|
||||||
}
|
|
||||||
interface {
|
|
||||||
ringnumber: 1
|
|
||||||
bindnetaddr: 10.0.42.0
|
|
||||||
broadcast: yes
|
|
||||||
mcastport: 5405
|
|
||||||
}
|
|
||||||
transport: udpu (2)
|
|
||||||
}
|
|
||||||
|
|
||||||
nodelist { (3)
|
|
||||||
node {
|
|
||||||
ring0_addr: 10.0.0.12
|
|
||||||
ring1_addr: 10.0.42.12
|
|
||||||
nodeid: 1
|
|
||||||
}
|
|
||||||
node {
|
|
||||||
ring0_addr: 10.0.0.13
|
|
||||||
ring1_addr: 10.0.42.13
|
|
||||||
nodeid: 2
|
|
||||||
}
|
|
||||||
node {
|
|
||||||
ring0_addr: 10.0.0.14
|
|
||||||
ring1_addr: 10.0.42.14
|
|
||||||
nodeid: 3
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#...
|
|
||||||
|
|
||||||
Note the following:
|
|
||||||
|
|
||||||
- If the ``broadcast`` parameter is set to yes,
|
|
||||||
the broadcast address is used for communication.
|
|
||||||
If this option is set, the ``mcastaddr`` parameter should not be set.
|
|
||||||
|
|
||||||
- The ``transport`` directive controls the transport mechanism used.
|
|
||||||
To avoid the use of multicast entirely,
|
|
||||||
specify the ``udpu`` unicast transport parameter.
|
|
||||||
This requires specifying the list of members
|
|
||||||
in the ``nodelist`` directive;
|
|
||||||
this could potentially make up the membership before deployment.
|
|
||||||
The default is ``udp``.
|
|
||||||
The transport type can also be set to ``udpu`` or ``iba``.
|
|
||||||
|
|
||||||
- Within the ``nodelist`` directive,
|
|
||||||
it is possible to specify specific information
|
|
||||||
about the nodes in the cluster.
|
|
||||||
The directive can contain only the node sub-directive,
|
|
||||||
which specifies every node that should be a member of the membership,
|
|
||||||
and where non-default options are needed.
|
|
||||||
Every node must have at least the ``ring0_addr`` field filled.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
For UDPU, every node that should be a member
|
|
||||||
of the membership must be specified.
|
|
||||||
|
|
||||||
Possible options are:
|
|
||||||
|
|
||||||
- ``ring{X}_addr`` specifies the IP address of one of the nodes.
|
|
||||||
{X} is the ring number.
|
|
||||||
|
|
||||||
- ``nodeid`` is optional
|
|
||||||
when using IPv4 and required when using IPv6.
|
|
||||||
This is a 32-bit value specifying the node identifier
|
|
||||||
delivered to the cluster membership service.
|
|
||||||
If this is not specified with IPv4,
|
|
||||||
the node id is determined from the 32-bit IP address
|
|
||||||
of the system to which the system is bound with ring identifier of 0.
|
|
||||||
The node identifier value of zero is reserved and should not be used.
|
|
||||||
|
|
||||||
|
|
||||||
.. _corosync-votequorum:
|
|
||||||
|
|
||||||
Set up Corosync with votequorum library
|
|
||||||
---------------------------------------
|
|
||||||
|
|
||||||
The votequorum library is part of the corosync project.
|
|
||||||
It provides an interface to the vote-based quorum service
|
|
||||||
and it must be explicitly enabled in the Corosync configuration file.
|
|
||||||
The main role of votequorum library is to avoid split-brain situations,
|
|
||||||
but it also provides a mechanism to:
|
|
||||||
|
|
||||||
- Query the quorum status
|
|
||||||
|
|
||||||
- Get a list of nodes known to the quorum service
|
|
||||||
|
|
||||||
- Receive notifications of quorum state changes
|
|
||||||
|
|
||||||
- Change the number of votes assigned to a node
|
|
||||||
|
|
||||||
- Change the number of expected votes for a cluster to be quorate
|
|
||||||
|
|
||||||
- Connect an additional quorum device
|
|
||||||
to allow small clusters remain quorate during node outages
|
|
||||||
|
|
||||||
The votequorum library has been created to replace and eliminate
|
|
||||||
qdisk, the disk-based quorum daemon for CMAN,
|
|
||||||
from advanced cluster configurations.
|
|
||||||
|
|
||||||
A sample votequorum service configuration
|
|
||||||
in the :file:`corosync.conf` file is:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
quorum {
|
|
||||||
provider: corosync_votequorum (1)
|
|
||||||
expected_votes: 7 (2)
|
|
||||||
wait_for_all: 1 (3)
|
|
||||||
last_man_standing: 1 (4)
|
|
||||||
last_man_standing_window: 10000 (5)
|
|
||||||
}
|
|
||||||
|
|
||||||
Note the following:
|
|
||||||
|
|
||||||
- Specifying ``corosync_votequorum`` enables the votequorum library;
|
|
||||||
this is the only required option.
|
|
||||||
|
|
||||||
- The cluster is fully operational with ``expected_votes`` set to 7 nodes
|
|
||||||
(each node has 1 vote), quorum: 4.
|
|
||||||
If a list of nodes is specified as ``nodelist``,
|
|
||||||
the ``expected_votes`` value is ignored.
|
|
||||||
|
|
||||||
- Setting ``wait_for_all`` to 1 means that,
|
|
||||||
When starting up a cluster (all nodes down),
|
|
||||||
the cluster quorum is held until all nodes are online
|
|
||||||
and have joined the cluster for the first time.
|
|
||||||
This parameter is new in Corosync 2.0.
|
|
||||||
|
|
||||||
- Setting ``last_man_standing`` to 1 enables
|
|
||||||
the Last Man Standing (LMS) feature;
|
|
||||||
by default, it is disabled (set to 0).
|
|
||||||
If a cluster is on the quorum edge
|
|
||||||
(``expected_votes:`` set to 7; ``online nodes:`` set to 4)
|
|
||||||
for longer than the time specified
|
|
||||||
for the ``last_man_standing_window`` parameter,
|
|
||||||
the cluster can recalculate quorum and continue operating
|
|
||||||
even if the next node will be lost.
|
|
||||||
This logic is repeated until the number of online nodes
|
|
||||||
in the cluster reaches 2.
|
|
||||||
In order to allow the cluster to step down from 2 members to only 1,
|
|
||||||
the ``auto_tie_breaker`` parameter needs to be set;
|
|
||||||
this is not recommended for production environments.
|
|
||||||
|
|
||||||
- ``last_man_standing_window`` specifies the time, in milliseconds,
|
|
||||||
required to recalculate quorum after one or more hosts
|
|
||||||
have been lost from the cluster.
|
|
||||||
To do the new quorum recalculation,
|
|
||||||
the cluster must have quorum for at least the interval
|
|
||||||
specified for ``last_man_standing_window``;
|
|
||||||
the default is 10000ms.
|
|
||||||
|
|
||||||
|
|
||||||
.. _pacemaker-corosync-start:
|
|
||||||
|
|
||||||
Start Corosync
|
|
||||||
--------------
|
|
||||||
|
|
||||||
``Corosync`` is started as a regular system service.
|
|
||||||
Depending on your distribution, it may ship with an LSB init script,
|
|
||||||
an upstart job, or a systemd unit file.
|
|
||||||
Either way, the service is usually named ``corosync``:
|
|
||||||
|
|
||||||
- To start ``corosync`` with the LSB init script:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# /etc/init.d/corosync start
|
|
||||||
|
|
||||||
- Alternatively:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service corosync start
|
|
||||||
|
|
||||||
- To start ``corosync`` with upstart:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# start corosync
|
|
||||||
|
|
||||||
- To start ``corosync`` with systemd unit file:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# systemctl start corosync
|
|
||||||
|
|
||||||
You can now check the ``corosync`` connectivity with one of these tools.
|
|
||||||
|
|
||||||
Use the :command:`corosync-cfgtool` utility with the :option:`-s` option
|
|
||||||
to get a summary of the health of the communication rings:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# corosync-cfgtool -s
|
|
||||||
Printing ring status.
|
|
||||||
Local node ID 435324542
|
|
||||||
RING ID 0
|
|
||||||
id = 10.0.0.82
|
|
||||||
status = ring 0 active with no faults
|
|
||||||
RING ID 1
|
|
||||||
id = 10.0.42.100
|
|
||||||
status = ring 1 active with no faults
|
|
||||||
|
|
||||||
Use the :command:`corosync-objctl` utility
|
|
||||||
to dump the Corosync cluster member list:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# corosync-objctl runtime.totem.pg.mrp.srp.members
|
|
||||||
runtime.totem.pg.mrp.srp.435324542.ip=r(0) ip(10.0.0.82) r(1) ip(10.0.42.100)
|
|
||||||
runtime.totem.pg.mrp.srp.435324542.join_count=1
|
|
||||||
runtime.totem.pg.mrp.srp.435324542.status=joined
|
|
||||||
runtime.totem.pg.mrp.srp.983895584.ip=r(0) ip(10.0.0.87) r(1) ip(10.0.42.254)
|
|
||||||
runtime.totem.pg.mrp.srp.983895584.join_count=1
|
|
||||||
runtime.totem.pg.mrp.srp.983895584.status=joined
|
|
||||||
|
|
||||||
You should see a ``status=joined`` entry
|
|
||||||
for each of your constituent cluster nodes.
|
|
||||||
|
|
||||||
[TODO: Should the main example now use corosync-cmapctl and have the note
|
|
||||||
give the command for Corosync version 1?]
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
If you are using Corosync version 2, use the :command:`corosync-cmapctl`
|
|
||||||
utility instead of :command:`corosync-objctl`; it is a direct replacement.
|
|
||||||
|
|
||||||
.. _pacemaker-start:
|
|
||||||
|
|
||||||
Start Pacemaker
|
|
||||||
---------------
|
|
||||||
|
|
||||||
After the ``corosync`` service have been started
|
|
||||||
and you have verified that the cluster is communicating properly,
|
|
||||||
you can start :command:`pacemakerd`, the Pacemaker master control process.
|
|
||||||
Choose one from the following four ways to start it:
|
|
||||||
|
|
||||||
- To start ``pacemaker`` with the LSB init script:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# /etc/init.d/pacemaker start
|
|
||||||
|
|
||||||
- Alternatively:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# service pacemaker start
|
|
||||||
|
|
||||||
- To start ``pacemaker`` with upstart:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# start pacemaker
|
|
||||||
|
|
||||||
- To start ``pacemaker`` with the systemd unit file:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# systemctl start pacemaker
|
|
||||||
|
|
||||||
After the ``pacemaker`` service have started,
|
|
||||||
Pacemaker creates a default empty cluster configuration with no resources.
|
|
||||||
Use the :command:`crm_mon` utility to observe the status of ``pacemaker``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
============
|
|
||||||
Last updated: Sun Oct 7 21:07:52 2012
|
|
||||||
Last change: Sun Oct 7 20:46:00 2012 via cibadmin on controller2
|
|
||||||
Stack: openais
|
|
||||||
Current DC: controller2 - partition with quorum
|
|
||||||
Version: 1.1.6-9971ebba4494012a93c03b40a2c58ec0eb60f50c
|
|
||||||
3 Nodes configured, 3 expected votes
|
|
||||||
0 Resources configured.
|
|
||||||
============
|
|
||||||
|
|
||||||
Online: [ controller3 controller2 controller1 ]
|
|
||||||
|
|
||||||
.. _pacemaker-cluster-properties:
|
|
||||||
|
|
||||||
Set basic cluster properties
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
After you set up your Pacemaker cluster,
|
|
||||||
you should set a few basic cluster properties:
|
|
||||||
|
|
||||||
``crmsh``
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ crm configure property pe-warn-series-max="1000" \
|
|
||||||
pe-input-series-max="1000" \
|
|
||||||
pe-error-series-max="1000" \
|
|
||||||
cluster-recheck-interval="5min"
|
|
||||||
|
|
||||||
``pcs``
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ pcs property set pe-warn-series-max=1000 \
|
|
||||||
pe-input-series-max=1000 \
|
|
||||||
pe-error-series-max=1000 \
|
|
||||||
cluster-recheck-interval=5min
|
|
||||||
|
|
||||||
Note the following:
|
|
||||||
|
|
||||||
- Setting the ``pe-warn-series-max``, ``pe-input-series-max``
|
|
||||||
and ``pe-error-series-max`` parameters to 1000
|
|
||||||
instructs Pacemaker to keep a longer history of the inputs processed
|
|
||||||
and errors and warnings generated by its Policy Engine.
|
|
||||||
This history is useful if you need to troubleshoot the cluster.
|
|
||||||
|
|
||||||
- Pacemaker uses an event-driven approach to cluster state processing.
|
|
||||||
The ``cluster-recheck-interval`` parameter (which defaults to 15 minutes)
|
|
||||||
defines the interval at which certain Pacemaker actions occur.
|
|
||||||
It is usually prudent to reduce this to a shorter interval,
|
|
||||||
such as 5 or 3 minutes.
|
|
||||||
|
|
||||||
After you make these changes, you may commit the updated configuration.
|
|
@ -1,310 +0,0 @@
|
|||||||
========
|
|
||||||
RabbitMQ
|
|
||||||
========
|
|
||||||
|
|
||||||
An AMQP (Advanced Message Queuing Protocol) compliant message bus is
|
|
||||||
required for most OpenStack components in order to coordinate the
|
|
||||||
execution of jobs entered into the system.
|
|
||||||
|
|
||||||
The most popular AMQP implementation used in OpenStack installations
|
|
||||||
is RabbitMQ.
|
|
||||||
|
|
||||||
RabbitMQ nodes fail over both on the application and the
|
|
||||||
infrastructure layers.
|
|
||||||
|
|
||||||
The application layer is controlled by the ``oslo.messaging``
|
|
||||||
configuration options for multiple AMQP hosts. If the AMQP node fails,
|
|
||||||
the application reconnects to the next one configured within the
|
|
||||||
specified reconnect interval. The specified reconnect interval
|
|
||||||
constitutes its SLA.
|
|
||||||
|
|
||||||
On the infrastructure layer, the SLA is the time for which RabbitMQ
|
|
||||||
cluster reassembles. Several cases are possible. The Mnesia keeper
|
|
||||||
node is the master of the corresponding Pacemaker resource for
|
|
||||||
RabbitMQ; when it fails, the result is a full AMQP cluster downtime
|
|
||||||
interval. Normally, its SLA is no more than several minutes. Failure
|
|
||||||
of another node that is a slave of the corresponding Pacemaker
|
|
||||||
resource for RabbitMQ results in no AMQP cluster downtime at all.
|
|
||||||
|
|
||||||
Making the RabbitMQ service highly available involves the following steps:
|
|
||||||
|
|
||||||
- :ref:`Install RabbitMQ<rabbitmq-install>`
|
|
||||||
|
|
||||||
- :ref:`Configure RabbitMQ for HA queues<rabbitmq-configure>`
|
|
||||||
|
|
||||||
- :ref:`Configure OpenStack services to use Rabbit HA queues
|
|
||||||
<rabbitmq-services>`
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Access to RabbitMQ is not normally handled by HAproxy. Instead,
|
|
||||||
consumers must be supplied with the full list of hosts running
|
|
||||||
RabbitMQ with ``rabbit_hosts`` and turn on the ``rabbit_ha_queues``
|
|
||||||
option.
|
|
||||||
|
|
||||||
Jon Eck found the `core issue
|
|
||||||
<http://people.redhat.com/jeckersb/private/vip-failover-tcp-persist.html>`_
|
|
||||||
and went into some detail regarding the `history and solution
|
|
||||||
<http://john.eckersberg.com/improving-ha-failures-with-tcp-timeouts.html>`_
|
|
||||||
on his blog.
|
|
||||||
|
|
||||||
In summary though:
|
|
||||||
|
|
||||||
The source address for the connection from HAProxy back to the
|
|
||||||
client is the VIP address. However the VIP address is no longer
|
|
||||||
present on the host. This means that the network (IP) layer
|
|
||||||
deems the packet unroutable, and informs the transport (TCP)
|
|
||||||
layer. TCP, however, is a reliable transport. It knows how to
|
|
||||||
handle transient errors and will retry. And so it does.
|
|
||||||
|
|
||||||
In this case that is a problem though, because:
|
|
||||||
|
|
||||||
TCP generally holds on to hope for a long time. A ballpark
|
|
||||||
estimate is somewhere on the order of tens of minutes (30
|
|
||||||
minutes is commonly referenced). During this time it will keep
|
|
||||||
probing and trying to deliver the data.
|
|
||||||
|
|
||||||
It is important to note that HAProxy has no idea that any of this is
|
|
||||||
happening. As far as its process is concerned, it called
|
|
||||||
``write()`` with the data and the kernel returned success. The
|
|
||||||
resolution is already understood and just needs to make its way
|
|
||||||
through a review.
|
|
||||||
|
|
||||||
.. _rabbitmq-install:
|
|
||||||
|
|
||||||
Install RabbitMQ
|
|
||||||
~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The commands for installing RabbitMQ are specific to the Linux distribution
|
|
||||||
you are using:
|
|
||||||
|
|
||||||
.. list-table:: Install RabbitMQ
|
|
||||||
:widths: 15 30
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Distribution
|
|
||||||
- Command
|
|
||||||
* - Ubuntu, Debian
|
|
||||||
- :command:`# apt-get install rabbitmq-server`
|
|
||||||
* - RHEL, Fedora, CentOS
|
|
||||||
- :command:`# yum install rabbitmq-server`
|
|
||||||
* - openSUSE
|
|
||||||
- :command:`# zypper install rabbitmq-server`
|
|
||||||
* - SLES 12
|
|
||||||
- :command:`# zypper addrepo -f obs://Cloud:OpenStack:Kilo/SLE_12 Kilo`
|
|
||||||
|
|
||||||
[Verify fingerprint of imported GPG key; see below]
|
|
||||||
|
|
||||||
:command:`# zypper install rabbitmq-server`
|
|
||||||
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
For SLES 12, the packages are signed by GPG key 893A90DAD85F9316.
|
|
||||||
You should verify the fingerprint of the imported GPG key before using it.
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
Key ID: 893A90DAD85F9316
|
|
||||||
Key Name: Cloud:OpenStack OBS Project <Cloud:OpenStack@build.opensuse.org>
|
|
||||||
Key Fingerprint: 35B34E18ABC1076D66D5A86B893A90DAD85F9316
|
|
||||||
Key Created: Tue Oct 8 13:34:21 2013
|
|
||||||
Key Expires: Thu Dec 17 13:34:21 2015
|
|
||||||
|
|
||||||
For more information,
|
|
||||||
see the official installation manual for the distribution:
|
|
||||||
|
|
||||||
- `Debian and Ubuntu <http://www.rabbitmq.com/install-debian.html>`_
|
|
||||||
- `RPM based <http://www.rabbitmq.com/install-rpm.html>`_
|
|
||||||
(RHEL, Fedora, CentOS, openSUSE)
|
|
||||||
|
|
||||||
.. _rabbitmq-configure:
|
|
||||||
|
|
||||||
Configure RabbitMQ for HA queues
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
[TODO: This section should begin with a brief mention
|
|
||||||
about what HA queues are and why they are valuable, etc]
|
|
||||||
|
|
||||||
We are building a cluster of RabbitMQ nodes to construct a RabbitMQ broker,
|
|
||||||
which is a logical grouping of several Erlang nodes.
|
|
||||||
|
|
||||||
The following components/services can work with HA queues:
|
|
||||||
|
|
||||||
[TODO: replace "currently" with specific release names]
|
|
||||||
|
|
||||||
[TODO: Does this list need to be updated? Perhaps we need a table
|
|
||||||
that shows each component and the earliest release that allows it
|
|
||||||
to work with HA queues.]
|
|
||||||
|
|
||||||
- OpenStack Compute
|
|
||||||
- OpenStack Block Storage
|
|
||||||
- OpenStack Networking
|
|
||||||
- Telemetry
|
|
||||||
|
|
||||||
We have to consider that, while exchanges and bindings
|
|
||||||
survive the loss of individual nodes,
|
|
||||||
queues and their messages do not
|
|
||||||
because a queue and its contents are located on one node.
|
|
||||||
If we lose this node, we also lose the queue.
|
|
||||||
|
|
||||||
Mirrored queues in RabbitMQ improve
|
|
||||||
the availability of service since it is resilient to failures.
|
|
||||||
|
|
||||||
Production servers should run (at least) three RabbitMQ servers;
|
|
||||||
for testing and demonstration purposes,
|
|
||||||
it is possible to run only two servers.
|
|
||||||
In this section, we configure two nodes,
|
|
||||||
called ``rabbit1`` and ``rabbit2``.
|
|
||||||
To build a broker, we need to ensure
|
|
||||||
that all nodes have the same Erlang cookie file.
|
|
||||||
|
|
||||||
[TODO: Should the example instead use a minimum of three nodes?]
|
|
||||||
|
|
||||||
#. To do so, stop RabbitMQ everywhere and copy the cookie
|
|
||||||
from the first node to each of the other node(s):
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# scp /var/lib/rabbitmq/.erlang.cookie root@NODE:/var/lib/rabbitmq/.erlang.cookie
|
|
||||||
|
|
||||||
#. On each target node, verify the correct owner,
|
|
||||||
group, and permissions of the file :file:`erlang.cookie`.
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie
|
|
||||||
# chmod 400 /var/lib/rabbitmq/.erlang.cookie
|
|
||||||
|
|
||||||
#. Start the message queue service on all nodes and configure it to start
|
|
||||||
when the system boots.
|
|
||||||
|
|
||||||
On Ubuntu, it is configured by default.
|
|
||||||
|
|
||||||
On CentOS, RHEL, openSUSE, and SLES:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# systemctl enable rabbitmq-server.service
|
|
||||||
# systemctl start rabbitmq-server.service
|
|
||||||
|
|
||||||
#. Verify that the nodes are running:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# rabbitmqctl cluster_status
|
|
||||||
Cluster status of node rabbit@NODE...
|
|
||||||
[{nodes,[{disc,[rabbit@NODE]}]},
|
|
||||||
{running_nodes,[rabbit@NODE]},
|
|
||||||
{partitions,[]}]
|
|
||||||
...done.
|
|
||||||
|
|
||||||
#. Run the following commands on each node except the first one:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# rabbitmqctl stop_app
|
|
||||||
Stopping node rabbit@NODE...
|
|
||||||
...done.
|
|
||||||
# rabbitmqctl join_cluster --ram rabbit@rabbit1
|
|
||||||
# rabbitmqctl start_app
|
|
||||||
Starting node rabbit@NODE ...
|
|
||||||
...done.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
The default node type is a disc node. In this guide, nodes
|
|
||||||
join the cluster as RAM nodes.
|
|
||||||
|
|
||||||
#. To verify the cluster status:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# rabbitmqctl cluster_status
|
|
||||||
Cluster status of node rabbit@NODE...
|
|
||||||
[{nodes,[{disc,[rabbit@rabbit1]},{ram,[rabbit@NODE]}]}, \
|
|
||||||
{running_nodes,[rabbit@NODE,rabbit@rabbit1]}]
|
|
||||||
|
|
||||||
If the cluster is working,
|
|
||||||
you can create usernames and passwords for the queues.
|
|
||||||
|
|
||||||
#. To ensure that all queues except those with auto-generated names
|
|
||||||
are mirrored across all running nodes,
|
|
||||||
set the ``ha-mode`` policy key to all
|
|
||||||
by running the following command on one of the nodes:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# rabbitmqctl set_policy ha-all '^(?!amq\.).*' '{"ha-mode": "all"}'
|
|
||||||
|
|
||||||
More information is available in the RabbitMQ documentation:
|
|
||||||
|
|
||||||
- `Highly Available Queues <http://www.rabbitmq.com/ha.html>`_
|
|
||||||
- `Clustering Guide <https://www.rabbitmq.com/clustering.html>`_
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
As another option to make RabbitMQ highly available, RabbitMQ contains the
|
|
||||||
OCF scripts for the Pacemaker cluster resource agents since version 3.5.7.
|
|
||||||
It provides the active/active RabbitMQ cluster with mirrored queues.
|
|
||||||
For more information, see `Auto-configuration of a cluster with
|
|
||||||
a Pacemaker <http://www.rabbitmq.com/pacemaker.html>`_.
|
|
||||||
|
|
||||||
.. _rabbitmq-services:
|
|
||||||
|
|
||||||
Configure OpenStack services to use Rabbit HA queues
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
We have to configure the OpenStack components
|
|
||||||
to use at least two RabbitMQ nodes.
|
|
||||||
|
|
||||||
Do this configuration on all services using RabbitMQ:
|
|
||||||
|
|
||||||
#. RabbitMQ HA cluster host:port pairs:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
rabbit_hosts=rabbit1:5672,rabbit2:5672,rabbit3:5672
|
|
||||||
|
|
||||||
#. How frequently to retry connecting with RabbitMQ:
|
|
||||||
[TODO: document the unit of measure here? Seconds?]
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
rabbit_retry_interval=1
|
|
||||||
|
|
||||||
#. How long to back-off for between retries when connecting to RabbitMQ:
|
|
||||||
[TODO: document the unit of measure here? Seconds?]
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
rabbit_retry_backoff=2
|
|
||||||
|
|
||||||
#. Maximum retries with trying to connect to RabbitMQ (infinite by default):
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
rabbit_max_retries=0
|
|
||||||
|
|
||||||
#. Use durable queues in RabbitMQ:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
rabbit_durable_queues=true
|
|
||||||
|
|
||||||
#. Use HA queues in RabbitMQ (x-ha-policy: all):
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
rabbit_ha_queues=true
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
If you change the configuration from an old set-up
|
|
||||||
that did not use HA queues, you should restart the service:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# rabbitmqctl stop_app
|
|
||||||
# rabbitmqctl reset
|
|
||||||
# rabbitmqctl start_app
|
|
@ -1,78 +0,0 @@
|
|||||||
|
|
||||||
=========
|
|
||||||
Telemetry
|
|
||||||
=========
|
|
||||||
|
|
||||||
[TODO (Add Telemetry overview)]
|
|
||||||
|
|
||||||
Telemetry central agent
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The Telemetry central agent can be configured to partition its polling
|
|
||||||
workload between multiple agents, enabling high availability.
|
|
||||||
|
|
||||||
Both the central and the compute agent can run in an HA deployment,
|
|
||||||
which means that multiple instances of these services can run in
|
|
||||||
parallel with workload partitioning among these running instances.
|
|
||||||
|
|
||||||
The `Tooz <https://pypi.python.org/pypi/tooz>`__ library provides
|
|
||||||
the coordination within the groups of service instances.
|
|
||||||
It provides an API above several back ends that can be used for building
|
|
||||||
distributed applications.
|
|
||||||
|
|
||||||
Tooz supports
|
|
||||||
`various drivers <http://docs.openstack.org/developer/tooz/drivers.html>`__
|
|
||||||
including the following back end solutions:
|
|
||||||
|
|
||||||
* `Zookeeper <http://zookeeper.apache.org/>`__.
|
|
||||||
Recommended solution by the Tooz project.
|
|
||||||
|
|
||||||
* `Redis <http://redis.io/>`__.
|
|
||||||
Recommended solution by the Tooz project.
|
|
||||||
|
|
||||||
* `Memcached <http://memcached.org/>`__.
|
|
||||||
Recommended for testing.
|
|
||||||
|
|
||||||
You must configure a supported Tooz driver for the HA deployment of
|
|
||||||
the Telemetry services.
|
|
||||||
|
|
||||||
For information about the required configuration options that have
|
|
||||||
to be set in the :file:`ceilometer.conf` configuration file for both
|
|
||||||
the central and compute agents, see the `coordination section
|
|
||||||
<http://docs.openstack.org/liberty/config-reference/content/
|
|
||||||
ch_configuring-openstack-telemetry.html>`__
|
|
||||||
in the OpenStack Configuration Reference.
|
|
||||||
|
|
||||||
.. note:: Without the ``backend_url`` option being set only one
|
|
||||||
instance of both the central and compute agent service is able to run
|
|
||||||
and function correctly.
|
|
||||||
|
|
||||||
The availability check of the instances is provided by heartbeat messages.
|
|
||||||
When the connection with an instance is lost, the workload will be
|
|
||||||
reassigned within the remained instances in the next polling cycle.
|
|
||||||
|
|
||||||
.. note:: Memcached uses a timeout value, which should always be set to
|
|
||||||
a value that is higher than the heartbeat value set for Telemetry.
|
|
||||||
|
|
||||||
For backward compatibility and supporting existing deployments, the central
|
|
||||||
agent configuration also supports using different configuration files for
|
|
||||||
groups of service instances of this type that are running in parallel.
|
|
||||||
For enabling this configuration, set a value for the partitioning_group_prefix
|
|
||||||
option in the `central section <http://docs.openstack.org/liberty/
|
|
||||||
config-reference/content/ch_configuring-openstack-telemetry.html>`__
|
|
||||||
in the OpenStack Configuration Reference.
|
|
||||||
|
|
||||||
.. warning:: For each sub-group of the central agent pool with the same
|
|
||||||
``partitioning_group_prefix`` a disjoint subset of meters must be polled --
|
|
||||||
otherwise samples may be missing or duplicated. The list of meters to poll
|
|
||||||
can be set in the :file:`/etc/ceilometer/pipeline.yaml` configuration file.
|
|
||||||
For more information about pipelines see the `Data collection and
|
|
||||||
processing
|
|
||||||
<http://docs.openstack.org/admin-guide/telemetry-data-collection.html#data-collection-and-processing>`__
|
|
||||||
section.
|
|
||||||
|
|
||||||
To enable the compute agent to run multiple instances simultaneously with
|
|
||||||
workload partitioning, the workload_partitioning option has to be set to
|
|
||||||
``True`` under the `compute section <http://docs.openstack.org/liberty/
|
|
||||||
config-reference/content/ch_configuring-openstack-telemetry.html>`__
|
|
||||||
in the :file:`ceilometer.conf` configuration file.
|
|
@ -1,24 +0,0 @@
|
|||||||
|
|
||||||
=================
|
|
||||||
Configure the VIP
|
|
||||||
=================
|
|
||||||
|
|
||||||
You must select and assign a virtual IP address (VIP)
|
|
||||||
that can freely float between cluster nodes.
|
|
||||||
|
|
||||||
This configuration creates ``vip``,
|
|
||||||
a virtual IP address for use by the API node (``10.0.0.11``):
|
|
||||||
|
|
||||||
For ``crmsh``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
primitive vip ocf:heartbeat:IPaddr2 \
|
|
||||||
params ip="10.0.0.11" cidr_netmask="24" op monitor interval="30s"
|
|
||||||
|
|
||||||
For ``pcs``:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# pcs resource create vip ocf:heartbeat:IPaddr2 \
|
|
||||||
params ip="10.0.0.11" cidr_netmask="24" op monitor interval="30s"
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
================================================
|
|
||||||
Configuring the controller for high availability
|
|
||||||
================================================
|
|
||||||
|
|
||||||
The cloud controller runs on the management network
|
|
||||||
and must talk to all other services.
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
controller-ha-pacemaker.rst
|
|
||||||
controller-ha-vip.rst
|
|
||||||
controller-ha-haproxy.rst
|
|
||||||
controller-ha-galera.rst
|
|
||||||
controller-ha-memcached.rst
|
|
||||||
controller-ha-rabbitmq.rst
|
|
||||||
controller-ha-keystone.rst
|
|
||||||
controller-ha-telemetry.rst
|
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 223 KiB |
Binary file not shown.
Before Width: | Height: | Size: 215 KiB |
Binary file not shown.
Before Width: | Height: | Size: 52 KiB |
@ -1,47 +0,0 @@
|
|||||||
|
|
||||||
==============
|
|
||||||
Hardware setup
|
|
||||||
==============
|
|
||||||
|
|
||||||
The standard hardware requirements:
|
|
||||||
|
|
||||||
- `Provider networks <http://docs.openstack.org/liberty/install-guide-ubuntu/overview.html#networking-option-1-provider-networks>`_
|
|
||||||
- `Self-service networks <http://docs.openstack.org/liberty/install-guide-ubuntu/overview.html#networking-option-2-self-service-networks>`_
|
|
||||||
|
|
||||||
However, OpenStack does not require a significant amount of resources
|
|
||||||
and the following minimum requirements should support
|
|
||||||
a proof-of-concept high availability environment
|
|
||||||
with core services and several instances:
|
|
||||||
|
|
||||||
[TODO: Verify that these numbers are good]
|
|
||||||
|
|
||||||
+-------------------+------------+----------+-----------+------+
|
|
||||||
| Node type | Processor | Memory | Storage | NIC |
|
|
||||||
+===================+============+==========+===========+======+
|
|
||||||
| controller node | 1-2 | 8 GB | 100 GB | 2 |
|
|
||||||
+-------------------+------------+----------+-----------+------+
|
|
||||||
| compute node | 2-4+ | 8+ GB | 100+ GB | 2 |
|
|
||||||
+-------------------+------------+----------+-----------+------+
|
|
||||||
|
|
||||||
|
|
||||||
For demonstrations and studying,
|
|
||||||
you can set up a test environment on virtual machines (VMs).
|
|
||||||
This has the following benefits:
|
|
||||||
|
|
||||||
- One physical server can support multiple nodes,
|
|
||||||
each of which supports almost any number of network interfaces.
|
|
||||||
|
|
||||||
- Ability to take periodic "snap shots" throughout the installation process
|
|
||||||
and "roll back" to a working configuration in the event of a problem.
|
|
||||||
|
|
||||||
However, running an OpenStack environment on VMs
|
|
||||||
degrades the performance of your instances,
|
|
||||||
particularly if your hypervisor and/or processor lacks support
|
|
||||||
for hardware acceleration of nested VMs.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
When installing highly-available OpenStack on VMs,
|
|
||||||
be sure that your hypervisor permits promiscuous mode
|
|
||||||
and disables MAC address filtering on the external network.
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
|||||||
|
|
||||||
=============================================
|
|
||||||
Hardware considerations for high availability
|
|
||||||
=============================================
|
|
||||||
|
|
||||||
[TODO: Provide a minimal architecture example for HA,
|
|
||||||
expanded on that given in
|
|
||||||
http://docs.openstack.org/liberty/install-guide-ubuntu/environment.html
|
|
||||||
for easy comparison]
|
|
||||||
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
hardware-ha-basic.rst
|
|
@ -1,43 +0,0 @@
|
|||||||
=================================
|
|
||||||
OpenStack High Availability Guide
|
|
||||||
=================================
|
|
||||||
|
|
||||||
Abstract
|
|
||||||
~~~~~~~~
|
|
||||||
|
|
||||||
This guide describes how to install and configure
|
|
||||||
OpenStack for high availability.
|
|
||||||
It supplements the OpenStack Installation Guides
|
|
||||||
and assumes that you are familiar with the material in those guides.
|
|
||||||
|
|
||||||
This guide documents OpenStack Mitaka, OpenStack Liberty, and OpenStack
|
|
||||||
Kilo releases.
|
|
||||||
|
|
||||||
.. warning:: This guide is a work-in-progress and changing rapidly
|
|
||||||
while we continue to test and enhance the guidance. Please note
|
|
||||||
where there are open "to do" items and help where you are able.
|
|
||||||
|
|
||||||
Contents
|
|
||||||
~~~~~~~~
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
common/conventions.rst
|
|
||||||
intro-ha.rst
|
|
||||||
hardware-ha.rst
|
|
||||||
install-ha.rst
|
|
||||||
networking-ha.rst
|
|
||||||
controller-ha.rst
|
|
||||||
storage-ha.rst
|
|
||||||
compute-node-ha.rst
|
|
||||||
noncore-ha.rst
|
|
||||||
|
|
||||||
|
|
||||||
common/app_support.rst
|
|
||||||
common/glossary.rst
|
|
||||||
|
|
||||||
Search in this guide
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
* :ref:`search`
|
|
@ -1,42 +0,0 @@
|
|||||||
|
|
||||||
=================
|
|
||||||
Install memcached
|
|
||||||
=================
|
|
||||||
|
|
||||||
[TODO: Verify that Oslo supports hash synchronization;
|
|
||||||
if so, this should not take more than load balancing.]
|
|
||||||
|
|
||||||
[TODO: This hands off to two different docs for install information.
|
|
||||||
We should choose one or explain the specific purpose of each.]
|
|
||||||
|
|
||||||
Most OpenStack services can use memcached
|
|
||||||
to store ephemeral data such as tokens.
|
|
||||||
Although memcached does not support
|
|
||||||
typical forms of redundancy such as clustering,
|
|
||||||
OpenStack services can use almost any number of instances
|
|
||||||
by configuring multiple hostnames or IP addresses.
|
|
||||||
The memcached client implements hashing
|
|
||||||
to balance objects among the instances.
|
|
||||||
Failure of an instance only impacts a percentage of the objects
|
|
||||||
and the client automatically removes it from the list of instances.
|
|
||||||
|
|
||||||
To install and configure memcached, read the
|
|
||||||
`official documentation <https://code.google.com/p/memcached/wiki/NewStart>`_.
|
|
||||||
|
|
||||||
Memory caching is managed by `oslo.cache
|
|
||||||
<http://specs.openstack.org/openstack/oslo-specs/specs/kilo/oslo-cache-using-dogpile.html>`_
|
|
||||||
so the way to use multiple memcached servers is the same for all projects.
|
|
||||||
|
|
||||||
[TODO: Should this show three hosts?]
|
|
||||||
|
|
||||||
Example configuration with two hosts:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
memcached_servers = controller1:11211,controller2:11211
|
|
||||||
|
|
||||||
By default, `controller1` handles the caching service but,
|
|
||||||
if the host goes down, `controller2` does the job.
|
|
||||||
For more information about memcached installation,
|
|
||||||
see the `OpenStack Administrator Guide
|
|
||||||
<http://docs.openstack.org/admin-guide/>`_.
|
|
@ -1,9 +0,0 @@
|
|||||||
=============
|
|
||||||
Configure NTP
|
|
||||||
=============
|
|
||||||
|
|
||||||
You must configure NTP to properly synchronize services among nodes.
|
|
||||||
We recommend that you configure the controller node to reference
|
|
||||||
more accurate (lower stratum) servers and other nodes to reference
|
|
||||||
the controller node. For more information, see the
|
|
||||||
`Install Guides <http://docs.openstack.org/#install-guides>`_.
|
|
@ -1,24 +0,0 @@
|
|||||||
=====================================
|
|
||||||
Install operating system on each node
|
|
||||||
=====================================
|
|
||||||
|
|
||||||
The first step in setting up your highly-available OpenStack cluster
|
|
||||||
is to install the operating system on each node.
|
|
||||||
Follow the instructions in the OpenStack Installation Guides:
|
|
||||||
|
|
||||||
- `CentOS and RHEL <http://docs.openstack.org/liberty/install-guide-rdo/environment.html>`_
|
|
||||||
- `openSUSE and SUSE Linux Enterprise Server <http://docs.openstack.org/liberty/install-guide-obs/environment.html>`_
|
|
||||||
- `Ubuntu <http://docs.openstack.org/liberty/install-guide-ubuntu/environment.html>`_
|
|
||||||
|
|
||||||
The OpenStack Installation Guides also include a list of the services
|
|
||||||
that use passwords with important notes about using them.
|
|
||||||
|
|
||||||
This guide uses the following example IP addresses:
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
# controller
|
|
||||||
10.0.0.11 controller # virtual IP
|
|
||||||
10.0.0.12 controller1
|
|
||||||
10.0.0.13 controller2
|
|
||||||
10.0.0.14 controller3
|
|
@ -1,12 +0,0 @@
|
|||||||
=====================================
|
|
||||||
Installing high availability packages
|
|
||||||
=====================================
|
|
||||||
|
|
||||||
[TODO -- write intro to this section]
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
install-ha-os.rst
|
|
||||||
install-ha-memcached.rst
|
|
||||||
install-ha-ntp.rst
|
|
@ -1,96 +0,0 @@
|
|||||||
============================
|
|
||||||
The keepalived architecture
|
|
||||||
============================
|
|
||||||
|
|
||||||
High availability strategies
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The following diagram shows a very simplified view of the different
|
|
||||||
strategies used to achieve high availability for the OpenStack
|
|
||||||
services:
|
|
||||||
|
|
||||||
.. image:: /figures/keepalived-arch.jpg
|
|
||||||
:width: 100%
|
|
||||||
|
|
||||||
Depending on the method used to communicate with the service, the
|
|
||||||
following availability strategies will be followed:
|
|
||||||
|
|
||||||
- Keepalived, for the HAProxy instances.
|
|
||||||
- Access via an HAProxy virtual IP, for services such as HTTPd that
|
|
||||||
are accessed via a TCP socket that can be load balanced
|
|
||||||
- Built-in application clustering, when available from the application.
|
|
||||||
Galera is one example of this.
|
|
||||||
- Starting up one instance of the service on several controller nodes,
|
|
||||||
when they can coexist and coordinate by other means. RPC in
|
|
||||||
``nova-conductor`` is one example of this.
|
|
||||||
- No high availability, when the service can only work in
|
|
||||||
active/passive mode.
|
|
||||||
|
|
||||||
There are known issues with cinder-volume that recommend setting it as
|
|
||||||
active-passive for now, see:
|
|
||||||
https://blueprints.launchpad.net/cinder/+spec/cinder-volume-active-active-support
|
|
||||||
|
|
||||||
While there will be multiple neutron LBaaS agents running, each agent
|
|
||||||
will manage a set of load balancers, that cannot be failed over to
|
|
||||||
another node.
|
|
||||||
|
|
||||||
Architecture limitations
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This architecture has some inherent limitations that should be kept in
|
|
||||||
mind during deployment and daily operations.
|
|
||||||
The following sections describe these limitations.
|
|
||||||
|
|
||||||
#. Keepalived and network partitions
|
|
||||||
|
|
||||||
In case of a network partitioning, there is a chance that two or
|
|
||||||
more nodes running keepalived claim to hold the same VIP, which may
|
|
||||||
lead to an undesired behaviour. Since keepalived uses VRRP over
|
|
||||||
multicast to elect a master (VIP owner), a network partition in
|
|
||||||
which keepalived nodes cannot communicate will result in the VIPs
|
|
||||||
existing on two nodes. When the network partition is resolved, the
|
|
||||||
duplicate VIPs should also be resolved. Note that this network
|
|
||||||
partition problem with VRRP is a known limitation for this
|
|
||||||
architecture.
|
|
||||||
|
|
||||||
#. Cinder-volume as a single point of failure
|
|
||||||
|
|
||||||
There are currently concerns over the cinder-volume service ability
|
|
||||||
to run as a fully active-active service. During the Mitaka
|
|
||||||
timeframe, this is being worked on, see:
|
|
||||||
https://blueprints.launchpad.net/cinder/+spec/cinder-volume-active-active-support
|
|
||||||
Thus, cinder-volume will only be running on one of the controller
|
|
||||||
nodes, even if it will be configured on all nodes. In case of a
|
|
||||||
failure in the node running cinder-volume, it should be started in
|
|
||||||
a surviving controller node.
|
|
||||||
|
|
||||||
#. Neutron-lbaas-agent as a single point of failure
|
|
||||||
|
|
||||||
The current design of the neutron LBaaS agent using the HAProxy
|
|
||||||
driver does not allow high availability for the tenant load
|
|
||||||
balancers. The neutron-lbaas-agent service will be enabled and
|
|
||||||
running on all controllers, allowing for load balancers to be
|
|
||||||
distributed across all nodes. However, a controller node failure
|
|
||||||
will stop all load balancers running on that node until the service
|
|
||||||
is recovered or the load balancer is manually removed and created
|
|
||||||
again.
|
|
||||||
|
|
||||||
#. Service monitoring and recovery required
|
|
||||||
|
|
||||||
An external service monitoring infrastructure is required to check
|
|
||||||
the OpenStack service health, and notify operators in case of any
|
|
||||||
failure. This architecture does not provide any facility for that,
|
|
||||||
so it would be necessary to integrate the OpenStack deployment with
|
|
||||||
any existing monitoring environment.
|
|
||||||
|
|
||||||
#. Manual recovery after a full cluster restart
|
|
||||||
|
|
||||||
Some support services used by RDO or RHEL OSP use their own form of
|
|
||||||
application clustering. Usually, these services maintain a cluster
|
|
||||||
quorum, that may be lost in case of a simultaneous restart of all
|
|
||||||
cluster nodes, for example during a power outage. Each service will
|
|
||||||
require its own procedure to regain quorum.
|
|
||||||
|
|
||||||
If you find any or all of these limitations concerning, you are
|
|
||||||
encouraged to refer to the
|
|
||||||
:doc:`Pacemaker HA architecture<intro-ha-arch-pacemaker>` instead.
|
|
@ -1,198 +0,0 @@
|
|||||||
==========================
|
|
||||||
The Pacemaker architecture
|
|
||||||
==========================
|
|
||||||
|
|
||||||
What is a cluster manager
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
At its core, a cluster is a distributed finite state machine capable
|
|
||||||
of co-ordinating the startup and recovery of inter-related services
|
|
||||||
across a set of machines.
|
|
||||||
|
|
||||||
Even a distributed and/or replicated application that is able to
|
|
||||||
survive failures on one or more machines can benefit from a
|
|
||||||
cluster manager:
|
|
||||||
|
|
||||||
#. Awareness of other applications in the stack
|
|
||||||
|
|
||||||
While SYS-V init replacements like systemd can provide
|
|
||||||
deterministic recovery of a complex stack of services, the
|
|
||||||
recovery is limited to one machine and lacks the context of what
|
|
||||||
is happening on other machines - context that is crucial to
|
|
||||||
determine the difference between a local failure, clean startup
|
|
||||||
and recovery after a total site failure.
|
|
||||||
|
|
||||||
#. Awareness of instances on other machines
|
|
||||||
|
|
||||||
Services like RabbitMQ and Galera have complicated boot-up
|
|
||||||
sequences that require co-ordination, and often serialization, of
|
|
||||||
startup operations across all machines in the cluster. This is
|
|
||||||
especially true after site-wide failure or shutdown where we must
|
|
||||||
first determine the last machine to be active.
|
|
||||||
|
|
||||||
#. A shared implementation and calculation of `quorum
|
|
||||||
<http://en.wikipedia.org/wiki/Quorum_(Distributed_Systems)>`_.
|
|
||||||
|
|
||||||
It is very important that all members of the system share the same
|
|
||||||
view of who their peers are and whether or not they are in the
|
|
||||||
majority. Failure to do this leads very quickly to an internal
|
|
||||||
`split-brain <http://en.wikipedia.org/wiki/Split-brain_(computing)>`_
|
|
||||||
state - where different parts of the system are pulling in
|
|
||||||
different and incompatible directions.
|
|
||||||
|
|
||||||
#. Data integrity through fencing (a non-responsive process does not
|
|
||||||
imply it is not doing anything)
|
|
||||||
|
|
||||||
A single application does not have sufficient context to know the
|
|
||||||
difference between failure of a machine and failure of the
|
|
||||||
applcation on a machine. The usual practice is to assume the
|
|
||||||
machine is dead and carry on, however this is highly risky - a
|
|
||||||
rogue process or machine could still be responding to requests and
|
|
||||||
generally causing havoc. The safer approach is to make use of
|
|
||||||
remotely accessible power switches and/or network switches and SAN
|
|
||||||
controllers to fence (isolate) the machine before continuing.
|
|
||||||
|
|
||||||
#. Automated recovery of failed instances
|
|
||||||
|
|
||||||
While the application can still run after the failure of several
|
|
||||||
instances, it may not have sufficient capacity to serve the
|
|
||||||
required volume of requests. A cluster can automatically recover
|
|
||||||
failed instances to prevent additional load induced failures.
|
|
||||||
|
|
||||||
For this reason, the use of a cluster manager like `Pacemaker
|
|
||||||
<http://clusterlabs.org>`_ is highly recommended.
|
|
||||||
|
|
||||||
Deployment flavors
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
It is possible to deploy three different flavors of the Pacemaker
|
|
||||||
architecture. The two extremes are **Collapsed** (where every
|
|
||||||
component runs on every node) and **Segregated** (where every
|
|
||||||
component runs in its own 3+ node cluster).
|
|
||||||
|
|
||||||
Regardless of which flavor you choose, it is recommended that the
|
|
||||||
clusters contain at least three nodes so that we can take advantage of
|
|
||||||
`quorum <quorum_>`_.
|
|
||||||
|
|
||||||
Quorum becomes important when a failure causes the cluster to split in
|
|
||||||
two or more partitions. In this situation, you want the majority to
|
|
||||||
ensure the minority are truly dead (through fencing) and continue to
|
|
||||||
host resources. For a two-node cluster, no side has the majority and
|
|
||||||
you can end up in a situation where both sides fence each other, or
|
|
||||||
both sides are running the same services - leading to data corruption.
|
|
||||||
|
|
||||||
Clusters with an even number of hosts suffer from similar issues - a
|
|
||||||
single network failure could easily cause a N:N split where neither
|
|
||||||
side retains a majority. For this reason, we recommend an odd number
|
|
||||||
of cluster members when scaling up.
|
|
||||||
|
|
||||||
You can have up to 16 cluster members (this is currently limited by
|
|
||||||
the ability of corosync to scale higher). In extreme cases, 32 and
|
|
||||||
even up to 64 nodes could be possible, however, this is not well tested.
|
|
||||||
|
|
||||||
Collapsed
|
|
||||||
---------
|
|
||||||
|
|
||||||
In this configuration, there is a single cluster of 3 or more
|
|
||||||
nodes on which every component is running.
|
|
||||||
|
|
||||||
This scenario has the advantage of requiring far fewer, if more
|
|
||||||
powerful, machines. Additionally, being part of a single cluster
|
|
||||||
allows us to accurately model the ordering dependencies between
|
|
||||||
components.
|
|
||||||
|
|
||||||
This scenario can be visualized as below.
|
|
||||||
|
|
||||||
.. image:: /figures/Cluster-deployment-collapsed.png
|
|
||||||
:width: 100%
|
|
||||||
|
|
||||||
You would choose this option if you prefer to have fewer but more
|
|
||||||
powerful boxes.
|
|
||||||
|
|
||||||
This is the most common option and the one we document here.
|
|
||||||
|
|
||||||
Segregated
|
|
||||||
----------
|
|
||||||
|
|
||||||
In this configuration, each service runs in a dedicated cluster of
|
|
||||||
3 or more nodes.
|
|
||||||
|
|
||||||
The benefits of this approach are the physical isolation between
|
|
||||||
components and the ability to add capacity to specific components.
|
|
||||||
|
|
||||||
You would choose this option if you prefer to have more but
|
|
||||||
less powerful boxes.
|
|
||||||
|
|
||||||
This scenario can be visualized as below, where each box below
|
|
||||||
represents a cluster of three or more guests.
|
|
||||||
|
|
||||||
.. image:: /figures/Cluster-deployment-segregated.png
|
|
||||||
:width: 100%
|
|
||||||
|
|
||||||
Mixed
|
|
||||||
-----
|
|
||||||
|
|
||||||
It is also possible to follow a segregated approach for one or more
|
|
||||||
components that are expected to be a bottleneck and use a collapsed
|
|
||||||
approach for the remainder.
|
|
||||||
|
|
||||||
|
|
||||||
Proxy server
|
|
||||||
~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Almost all services in this stack benefit from being proxied.
|
|
||||||
Using a proxy server provides:
|
|
||||||
|
|
||||||
#. Load distribution
|
|
||||||
|
|
||||||
Many services can act in an active/active capacity, however, they
|
|
||||||
usually require an external mechanism for distributing requests to
|
|
||||||
one of the available instances. The proxy server can serve this
|
|
||||||
role.
|
|
||||||
|
|
||||||
#. API isolation
|
|
||||||
|
|
||||||
By sending all API access through the proxy, we can clearly
|
|
||||||
identify service interdependencies. We can also move them to
|
|
||||||
locations other than ``localhost`` to increase capacity if the
|
|
||||||
need arises.
|
|
||||||
|
|
||||||
#. Simplified process for adding/removing of nodes
|
|
||||||
|
|
||||||
Since all API access is directed to the proxy, adding or removing
|
|
||||||
nodes has no impact on the configuration of other services. This
|
|
||||||
can be very useful in upgrade scenarios where an entirely new set
|
|
||||||
of machines can be configured and tested in isolation before
|
|
||||||
telling the proxy to direct traffic there instead.
|
|
||||||
|
|
||||||
#. Enhanced failure detection
|
|
||||||
|
|
||||||
The proxy can be configured as a secondary mechanism for detecting
|
|
||||||
service failures. It can even be configured to look for nodes in
|
|
||||||
a degraded state (such as being 'too far' behind in the
|
|
||||||
replication) and take them out of circulation.
|
|
||||||
|
|
||||||
The following components are currently unable to benefit from the use
|
|
||||||
of a proxy server:
|
|
||||||
|
|
||||||
* RabbitMQ
|
|
||||||
* Memcached
|
|
||||||
* MongoDB
|
|
||||||
|
|
||||||
However, the reasons vary and are discussed under each component's
|
|
||||||
heading.
|
|
||||||
|
|
||||||
We recommend HAProxy as the load balancer, however, there are many
|
|
||||||
alternatives in the marketplace.
|
|
||||||
|
|
||||||
We use a check interval of 1 second, however, the timeouts vary by service.
|
|
||||||
|
|
||||||
Generally, we use round-robin to distribute load amongst instances of
|
|
||||||
active/active services, however, Galera uses the ``stick-table`` options
|
|
||||||
to ensure that incoming connections to the virtual IP (VIP) should be
|
|
||||||
directed to only one of the available back ends.
|
|
||||||
|
|
||||||
In Galera's case, although it can run active/active, this helps avoid
|
|
||||||
lock contention and prevent deadlocks. It is used in combination with
|
|
||||||
the ``httpchk`` option that ensures only nodes that are in sync with its
|
|
||||||
peers are allowed to handle requests.
|
|
@ -1,4 +0,0 @@
|
|||||||
|
|
||||||
==========================================
|
|
||||||
Overview of highly-available compute nodes
|
|
||||||
==========================================
|
|
@ -1,213 +0,0 @@
|
|||||||
==========================
|
|
||||||
High availability concepts
|
|
||||||
==========================
|
|
||||||
|
|
||||||
High availability systems seek to minimize two things:
|
|
||||||
|
|
||||||
**System downtime**
|
|
||||||
Occurs when a user-facing service is unavailable
|
|
||||||
beyond a specified maximum amount of time.
|
|
||||||
|
|
||||||
**Data loss**
|
|
||||||
Accidental deletion or destruction of data.
|
|
||||||
|
|
||||||
Most high availability systems guarantee protection against system downtime
|
|
||||||
and data loss only in the event of a single failure.
|
|
||||||
However, they are also expected to protect against cascading failures,
|
|
||||||
where a single failure deteriorates into a series of consequential failures.
|
|
||||||
Many service providers guarantee :term:`Service Level Agreement (SLA)`
|
|
||||||
including uptime percentage of computing service, which is calculated based
|
|
||||||
on the available time and system downtime excluding planned outage time.
|
|
||||||
|
|
||||||
Redundancy and failover
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
High availability is implemented with redundant hardware
|
|
||||||
running redundant instances of each service.
|
|
||||||
If one piece of hardware running one instance of a service fails,
|
|
||||||
the system can then failover to use another instance of a service
|
|
||||||
that is running on hardware that did not fail.
|
|
||||||
|
|
||||||
A crucial aspect of high availability
|
|
||||||
is the elimination of single points of failure (SPOFs).
|
|
||||||
A SPOF is an individual piece of equipment or software
|
|
||||||
that causes system downtime or data loss if it fails.
|
|
||||||
In order to eliminate SPOFs, check that mechanisms exist for redundancy of:
|
|
||||||
|
|
||||||
- Network components, such as switches and routers
|
|
||||||
|
|
||||||
- Applications and automatic service migration
|
|
||||||
|
|
||||||
- Storage components
|
|
||||||
|
|
||||||
- Facility services such as power, air conditioning, and fire protection
|
|
||||||
|
|
||||||
In the event that a component fails and a back-up system must take on
|
|
||||||
its load, most high availability systems will replace the failed
|
|
||||||
component as quickly as possible to maintain necessary redundancy. This
|
|
||||||
way time spent in a degraded protection state is minimized.
|
|
||||||
|
|
||||||
Most high availability systems fail in the event of multiple
|
|
||||||
independent (non-consequential) failures. In this case, most
|
|
||||||
implementations favor protecting data over maintaining availability.
|
|
||||||
|
|
||||||
High availability systems typically achieve an uptime percentage of
|
|
||||||
99.99% or more, which roughly equates to less than an hour of
|
|
||||||
cumulative downtime per year. In order to achieve this, high
|
|
||||||
availability systems should keep recovery times after a failure to
|
|
||||||
about one to two minutes, sometimes significantly less.
|
|
||||||
|
|
||||||
OpenStack currently meets such availability requirements for its own
|
|
||||||
infrastructure services, meaning that an uptime of 99.99% is feasible
|
|
||||||
for the OpenStack infrastructure proper. However, OpenStack does not
|
|
||||||
guarantee 99.99% availability for individual guest instances.
|
|
||||||
|
|
||||||
This document discusses some common methods of implementing highly
|
|
||||||
available systems, with an emphasis on the core OpenStack services and
|
|
||||||
other open source services that are closely aligned with OpenStack.
|
|
||||||
These methods are by no means the only ways to do it;
|
|
||||||
you may supplement these services with commercial hardware and software
|
|
||||||
that provides additional features and functionality.
|
|
||||||
You also need to address high availability concerns
|
|
||||||
for any applications software that you run on your OpenStack environment.
|
|
||||||
The important thing is to make sure that your services are redundant
|
|
||||||
and available; how you achieve that is up to you.
|
|
||||||
|
|
||||||
Stateless vs. stateful services
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Preventing single points of failure can depend on whether or not a
|
|
||||||
service is stateless.
|
|
||||||
|
|
||||||
Stateless service
|
|
||||||
A service that provides a response after your request
|
|
||||||
and then requires no further attention.
|
|
||||||
To make a stateless service highly available,
|
|
||||||
you need to provide redundant instances and load balance them.
|
|
||||||
OpenStack services that are stateless include ``nova-api``,
|
|
||||||
``nova-conductor``, ``glance-api``, ``keystone-api``,
|
|
||||||
``neutron-api`` and ``nova-scheduler``.
|
|
||||||
|
|
||||||
Stateful service
|
|
||||||
A service where subsequent requests to the service
|
|
||||||
depend on the results of the first request.
|
|
||||||
Stateful services are more difficult to manage because a single
|
|
||||||
action typically involves more than one request, so simply providing
|
|
||||||
additional instances and load balancing does not solve the problem.
|
|
||||||
For example, if the horizon user interface reset itself every time
|
|
||||||
you went to a new page, it would not be very useful.
|
|
||||||
OpenStack services that are stateful include the OpenStack database
|
|
||||||
and message queue.
|
|
||||||
Making stateful services highly available can depend on whether you choose
|
|
||||||
an active/passive or active/active configuration.
|
|
||||||
|
|
||||||
Active/Passive vs. Active/Active
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Stateful services may be configured as active/passive or active/active:
|
|
||||||
|
|
||||||
:term:`active/passive configuration`
|
|
||||||
Maintains a redundant instance
|
|
||||||
that can be brought online when the active service fails.
|
|
||||||
For example, OpenStack writes to the main database
|
|
||||||
while maintaining a disaster recovery database that can be brought online
|
|
||||||
if the main database fails.
|
|
||||||
|
|
||||||
A typical active/passive installation for a stateful service maintains
|
|
||||||
a replacement resource that can be brought online when required.
|
|
||||||
Requests are handled using a :term:`virtual IP` address (VIP) that
|
|
||||||
facilitates returning to service with minimal reconfiguration.
|
|
||||||
A separate application (such as Pacemaker or Corosync) monitors
|
|
||||||
these services, bringing the backup online as necessary.
|
|
||||||
|
|
||||||
:term:`active/active configuration`
|
|
||||||
Each service also has a backup but manages both the main and
|
|
||||||
redundant systems concurrently.
|
|
||||||
This way, if there is a failure, the user is unlikely to notice.
|
|
||||||
The backup system is already online and takes on increased load
|
|
||||||
while the main system is fixed and brought back online.
|
|
||||||
|
|
||||||
Typically, an active/active installation for a stateless service
|
|
||||||
maintains a redundant instance, and requests are load balanced using
|
|
||||||
a virtual IP address and a load balancer such as HAProxy.
|
|
||||||
|
|
||||||
A typical active/active installation for a stateful service includes
|
|
||||||
redundant services, with all instances having an identical state. In
|
|
||||||
other words, updates to one instance of a database update all other
|
|
||||||
instances. This way a request to one instance is the same as a
|
|
||||||
request to any other. A load balancer manages the traffic to these
|
|
||||||
systems, ensuring that operational systems always handle the
|
|
||||||
request.
|
|
||||||
|
|
||||||
Clusters and quorums
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The quorum specifies the minimal number of nodes
|
|
||||||
that must be functional in a cluster of redundant nodes
|
|
||||||
in order for the cluster to remain functional.
|
|
||||||
When one node fails and failover transfers control to other nodes,
|
|
||||||
the system must ensure that data and processes remain sane.
|
|
||||||
To determine this, the contents of the remaining nodes are compared
|
|
||||||
and, if there are discrepancies, a "majority rules" algorithm is implemented.
|
|
||||||
|
|
||||||
For this reason, each cluster in a high availability environment should
|
|
||||||
have an odd number of nodes and the quorum is defined as more than a half
|
|
||||||
of the nodes.
|
|
||||||
If multiple nodes fail so that the cluster size falls below the quorum
|
|
||||||
value, the cluster itself fails.
|
|
||||||
|
|
||||||
For example, in a seven-node cluster, the quorum should be set to
|
|
||||||
floor(7/2) + 1 == 4. If quorum is four and four nodes fail simultaneously,
|
|
||||||
the cluster itself would fail, whereas it would continue to function, if
|
|
||||||
no more than three nodes fail. If split to partitions of three and four nodes
|
|
||||||
respectively, the quorum of four nodes would continue to operate the majority
|
|
||||||
partition and stop or fence the minority one (depending on the
|
|
||||||
no-quorum-policy cluster configuration).
|
|
||||||
|
|
||||||
And the quorum could also have been set to three, just as a configuration
|
|
||||||
example.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Note that setting the quorum to a value less than floor(n/2) + 1 is not
|
|
||||||
recommended and would likely cause a split-brain in a face of network
|
|
||||||
partitions.
|
|
||||||
|
|
||||||
Then, for the given example when four nodes fail simultaneously,
|
|
||||||
the cluster would continue to function as well. But if split to partitions of
|
|
||||||
three and four nodes respectively, the quorum of three would have made both
|
|
||||||
sides to attempt to fence the other and host resources. And without fencing
|
|
||||||
enabled, it would go straight to running two copies of each resource.
|
|
||||||
|
|
||||||
This is why setting the quorum to a value less than floor(n/2) + 1 is
|
|
||||||
dangerous. However it may be required for some specific cases, like a
|
|
||||||
temporary measure at a point it is known with 100% certainty that the other
|
|
||||||
nodes are down.
|
|
||||||
|
|
||||||
When configuring an OpenStack environment for study or demonstration purposes,
|
|
||||||
it is possible to turn off the quorum checking;
|
|
||||||
this is discussed later in this guide.
|
|
||||||
Production systems should always run with quorum enabled.
|
|
||||||
|
|
||||||
|
|
||||||
Single-controller high availability mode
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
OpenStack supports a single-controller high availability mode
|
|
||||||
that is managed by the services that manage highly available environments
|
|
||||||
but is not actually highly available because
|
|
||||||
no redundant controllers are configured to use for failover.
|
|
||||||
This environment can be used for study and demonstration
|
|
||||||
but is not appropriate for a production environment.
|
|
||||||
|
|
||||||
It is possible to add controllers to such an environment
|
|
||||||
to convert it into a truly highly available environment.
|
|
||||||
|
|
||||||
|
|
||||||
High availability is not for every user. It presents some challenges.
|
|
||||||
High availability may be too complex for databases or
|
|
||||||
systems with large amounts of data. Replication can slow large systems
|
|
||||||
down. Different setups have different prerequisites. Read the guidelines
|
|
||||||
for each setup.
|
|
||||||
|
|
||||||
High availability is turned off as the default in OpenStack setups.
|
|
@ -1,62 +0,0 @@
|
|||||||
========================================
|
|
||||||
Overview of highly-available controllers
|
|
||||||
========================================
|
|
||||||
|
|
||||||
OpenStack is a set of multiple services exposed to the end users
|
|
||||||
as HTTP(s) APIs. Additionally, for own internal usage OpenStack
|
|
||||||
requires SQL database server and AMQP broker. The physical servers,
|
|
||||||
where all the components are running are often called controllers.
|
|
||||||
This modular OpenStack architecture allows to duplicate all the
|
|
||||||
components and run them on different controllers.
|
|
||||||
By making all the components redundant it is possible to make
|
|
||||||
OpenStack highly-available.
|
|
||||||
|
|
||||||
In general we can divide all the OpenStack components into three categories:
|
|
||||||
|
|
||||||
- OpenStack APIs, these are HTTP(s) stateless services written in python,
|
|
||||||
easy to duplicate and mostly easy to load balance.
|
|
||||||
|
|
||||||
- SQL relational database server provides stateful type consumed by other
|
|
||||||
components. Supported databases are MySQL, MariaDB, and PostgreSQL.
|
|
||||||
Making SQL database redundant is complex.
|
|
||||||
|
|
||||||
- :term:`Advanced Message Queuing Protocol (AMQP)` provides OpenStack
|
|
||||||
internal stateful communication service.
|
|
||||||
|
|
||||||
Network components
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
[TODO Need discussion of network hardware, bonding interfaces,
|
|
||||||
intelligent Layer 2 switches, routers and Layer 3 switches.]
|
|
||||||
|
|
||||||
The configuration uses static routing without
|
|
||||||
Virtual Router Redundancy Protocol (VRRP)
|
|
||||||
or similar techniques implemented.
|
|
||||||
|
|
||||||
[TODO Need description of VIP failover inside Linux namespaces
|
|
||||||
and expected SLA.]
|
|
||||||
|
|
||||||
See [TODO link] for more information about configuring networking
|
|
||||||
for high availability.
|
|
||||||
|
|
||||||
Common deployement architectures
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
There are primarily two HA architectures in use today.
|
|
||||||
|
|
||||||
One uses a cluster manager such as Pacemaker or Veritas to co-ordinate
|
|
||||||
the actions of the various services across a set of machines. Since
|
|
||||||
we are focused on FOSS, we will refer to this as the Pacemaker
|
|
||||||
architecture.
|
|
||||||
|
|
||||||
The other is optimized for Active/Active services that do not require
|
|
||||||
any inter-machine coordination. In this setup, services are started by
|
|
||||||
your init system (systemd in most modern distributions) and a tool is
|
|
||||||
used to move IP addresses between the hosts. The most common package
|
|
||||||
for doing this is keepalived.
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
intro-ha-arch-pacemaker.rst
|
|
||||||
intro-ha-arch-keepalived.rst
|
|
@ -1,4 +0,0 @@
|
|||||||
|
|
||||||
======================================
|
|
||||||
High availability for other components
|
|
||||||
======================================
|
|
@ -1,12 +0,0 @@
|
|||||||
=====================================
|
|
||||||
Overview of high availability storage
|
|
||||||
=====================================
|
|
||||||
|
|
||||||
Making the Block Storage (cinder) API service highly available in
|
|
||||||
active/active mode involves:
|
|
||||||
|
|
||||||
* Configuring Block Storage to listen on the VIP address
|
|
||||||
|
|
||||||
* Managing the Block Storage API daemon with the Pacemaker cluster manager
|
|
||||||
|
|
||||||
* Configuring OpenStack services to use this IP address
|
|
@ -1,15 +0,0 @@
|
|||||||
|
|
||||||
===========================================
|
|
||||||
Introduction to OpenStack high availability
|
|
||||||
===========================================
|
|
||||||
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
intro-ha-concepts.rst
|
|
||||||
intro-ha-controller.rst
|
|
||||||
intro-ha-storage.rst
|
|
||||||
intro-ha-compute.rst
|
|
||||||
intro-ha-other.rst
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,17 +0,0 @@
|
|||||||
|
|
||||||
.. _dhcp-agent:
|
|
||||||
|
|
||||||
======================
|
|
||||||
Run neutron DHCP agent
|
|
||||||
======================
|
|
||||||
|
|
||||||
The OpenStack Networking service has a scheduler
|
|
||||||
that lets you run multiple agents across nodes;
|
|
||||||
the DHCP agent can be natively highly available.
|
|
||||||
To configure the number of DHCP agents per network,
|
|
||||||
modify the ``dhcp_agents_per_network`` parameter
|
|
||||||
in the :file:`/etc/neutron/neutron.conf` file.
|
|
||||||
By default this is set to 1.
|
|
||||||
To achieve high availability,
|
|
||||||
assign more than one DHCP agent per network.
|
|
||||||
|
|
@ -1,37 +0,0 @@
|
|||||||
|
|
||||||
.. _neutron-l3:
|
|
||||||
|
|
||||||
====================
|
|
||||||
Run neutron L3 agent
|
|
||||||
====================
|
|
||||||
|
|
||||||
The neutron L3 agent is scalable, due to the scheduler that supports
|
|
||||||
Virtual Router Redundancy Protocol (VRRP)
|
|
||||||
to distribute virtual routers across multiple nodes.
|
|
||||||
To enable high availability for configured routers,
|
|
||||||
edit the :file:`/etc/neutron/neutron.conf` file
|
|
||||||
to set the following values:
|
|
||||||
|
|
||||||
.. list-table:: /etc/neutron/neutron.conf parameters for high availability
|
|
||||||
:widths: 15 10 30
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Parameter
|
|
||||||
- Value
|
|
||||||
- Description
|
|
||||||
* - l3_ha
|
|
||||||
- True
|
|
||||||
- All routers are highly available by default.
|
|
||||||
* - allow_automatic_l3agent_failover
|
|
||||||
- True
|
|
||||||
- Set automatic L3 agent failover for routers
|
|
||||||
* - max_l3_agents_per_router
|
|
||||||
- 2 or more
|
|
||||||
- Maximum number of network nodes to use for the HA router.
|
|
||||||
* - min_l3_agents_per_router
|
|
||||||
- 2 or more
|
|
||||||
- Minimum number of network nodes to use for the HA router.
|
|
||||||
A new router can be created only if this number
|
|
||||||
of network nodes are available.
|
|
||||||
|
|
||||||
|
|
@ -1,17 +0,0 @@
|
|||||||
|
|
||||||
.. _neutron-lbaas:
|
|
||||||
|
|
||||||
=======================
|
|
||||||
Run neutron LBaaS agent
|
|
||||||
=======================
|
|
||||||
|
|
||||||
Currently, no native feature is provided
|
|
||||||
to make the LBaaS agent highly available
|
|
||||||
using the default plug-in HAProxy.
|
|
||||||
A common way to make HAProxy highly available
|
|
||||||
is to use the VRRP (Virtual Router Redundancy Protocol).
|
|
||||||
Unfortunately, this is not yet implemented
|
|
||||||
in the LBaaS HAProxy plug-in.
|
|
||||||
|
|
||||||
[TODO: update this section.]
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
|||||||
|
|
||||||
.. _neutron-metadata:
|
|
||||||
|
|
||||||
==========================
|
|
||||||
Run neutron metadata agent
|
|
||||||
==========================
|
|
||||||
|
|
||||||
No native feature is available
|
|
||||||
to make this service highly available.
|
|
||||||
At this time, the Active/Passive solution exists
|
|
||||||
to run the neutron metadata agent
|
|
||||||
in failover mode with Pacemaker.
|
|
||||||
|
|
||||||
[TODO: Update this information.
|
|
||||||
Can this service now be made HA in active/active mode
|
|
||||||
or do we need to pull in the instructions
|
|
||||||
to run this service in active/passive mode?]
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
|||||||
|
|
||||||
=======================
|
|
||||||
OpenStack network nodes
|
|
||||||
=======================
|
|
||||||
|
|
||||||
Configure networking on each node.
|
|
||||||
The
|
|
||||||
`Networking <http://docs.openstack.org/liberty/install-guide-ubuntu/environment-networking.html>`_
|
|
||||||
section of the *Install Guide* includes basic information
|
|
||||||
about configuring networking.
|
|
||||||
|
|
||||||
Notes from planning outline:
|
|
||||||
|
|
||||||
- Rather than configuring neutron here,
|
|
||||||
we should simply mention physical network HA methods
|
|
||||||
such as bonding and additional node/network requirements
|
|
||||||
for L3HA and DVR for planning purposes.
|
|
||||||
- Neutron agents shuld be described for active/active;
|
|
||||||
deprecate single agent's instances case.
|
|
||||||
- For Kilo and beyond, focus on L3HA and DVR.
|
|
||||||
- Link to `Networking Guide <http://docs.openstack.org/networking-guide/>`_
|
|
||||||
for configuration details.
|
|
||||||
|
|
||||||
[TODO: Verify that the active/passive
|
|
||||||
network configuration information from
|
|
||||||
`<http://docs.openstack.org/high-availability-guide/content/s-neutron-server.html>`_
|
|
||||||
should not be included here.
|
|
||||||
|
|
||||||
`LP1328922 <https://bugs.launchpad.net/openstack-manuals/+bug/1328922>`_
|
|
||||||
and
|
|
||||||
`LP1349398 <https://bugs.launchpad.net/openstack-manuals/+bug/1349398>`_
|
|
||||||
are related.]
|
|
||||||
|
|
||||||
OpenStack network nodes contain:
|
|
||||||
|
|
||||||
- :ref:`Neutron DHCP agent<dhcp-agent>`
|
|
||||||
- Neutron L2 agent.
|
|
||||||
Note that the L2 agent cannot be distributed and highly available.
|
|
||||||
Instead, it must be installed on each data forwarding node
|
|
||||||
to control the virtual network drivers
|
|
||||||
such as Open vSwitch or Linux Bridge.
|
|
||||||
One L2 agent runs per node and controls its virtual interfaces.
|
|
||||||
- :ref:`Neutron L3 agent<neutron-l3>`
|
|
||||||
- :ref:`Neutron metadata agent<neutron-metadata>`
|
|
||||||
- :ref:`Neutron LBaaS<neutron-lbaas>` (Load Balancing as a Service) agent
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
For Liberty, we do not have the standalone network nodes in general.
|
|
||||||
We usually run the Networking services on the controller nodes.
|
|
||||||
In this guide, we use the term "network nodes" for convenience.
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
networking-ha-dhcp.rst
|
|
||||||
networking-ha-l3.rst
|
|
||||||
networking-ha-metadata.rst
|
|
||||||
networking-ha-lbaas.rst
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
|||||||
|
|
||||||
=====================================================
|
|
||||||
Configuring non-core components for high availability
|
|
||||||
=====================================================
|
|
@ -1,85 +0,0 @@
|
|||||||
|
|
||||||
.. _storage-ha-backend:
|
|
||||||
|
|
||||||
================
|
|
||||||
Storage back end
|
|
||||||
================
|
|
||||||
|
|
||||||
Most of this guide concerns the control plane of high availability:
|
|
||||||
ensuring that services continue to run even if a component fails.
|
|
||||||
Ensuring that data is not lost
|
|
||||||
is the data plane component of high availability;
|
|
||||||
this is discussed here.
|
|
||||||
|
|
||||||
An OpenStack environment includes multiple data pools for the VMs:
|
|
||||||
|
|
||||||
- Ephemeral storage is allocated for an instance
|
|
||||||
and is deleted when the instance is deleted.
|
|
||||||
The Compute service manages ephemeral storage.
|
|
||||||
By default, Compute stores ephemeral drives as files
|
|
||||||
on local disks on the Compute node
|
|
||||||
but Ceph RBD can instead be used
|
|
||||||
as the storage back end for ephemeral storage.
|
|
||||||
|
|
||||||
- Persistent storage exists outside all instances.
|
|
||||||
Two types of persistent storage are provided:
|
|
||||||
|
|
||||||
- Block Storage service (cinder)
|
|
||||||
can use LVM or Ceph RBD as the storage back end.
|
|
||||||
- Image service (glance)
|
|
||||||
can use the Object Storage service (swift)
|
|
||||||
or Ceph RBD as the storage back end.
|
|
||||||
|
|
||||||
For more information about configuring storage back ends for
|
|
||||||
the different storage options, see the `Administrator Guide
|
|
||||||
<http://docs.openstack.org/admin-guide/>`_.
|
|
||||||
|
|
||||||
This section discusses ways to protect against
|
|
||||||
data loss in your OpenStack environment.
|
|
||||||
|
|
||||||
RAID drives
|
|
||||||
-----------
|
|
||||||
|
|
||||||
Configuring RAID on the hard drives that implement storage
|
|
||||||
protects your data against a hard drive failure.
|
|
||||||
If, however, the node itself fails, data may be lost.
|
|
||||||
In particular, all volumes stored on an LVM node can be lost.
|
|
||||||
|
|
||||||
Ceph
|
|
||||||
----
|
|
||||||
|
|
||||||
`Ceph RBD <http://ceph.com/>`_
|
|
||||||
is an innately high availability storage back end.
|
|
||||||
It creates a storage cluster with multiple nodes
|
|
||||||
that communicate with each other
|
|
||||||
to replicate and redistribute data dynamically.
|
|
||||||
A Ceph RBD storage cluster provides
|
|
||||||
a single shared set of storage nodes
|
|
||||||
that can handle all classes of persistent and ephemeral data
|
|
||||||
-- glance, cinder, and nova --
|
|
||||||
that are required for OpenStack instances.
|
|
||||||
|
|
||||||
Ceph RBD provides object replication capabilities
|
|
||||||
by storing Block Storage volumes as Ceph RBD objects;
|
|
||||||
Ceph RBD ensures that each replica of an object
|
|
||||||
is stored on a different node.
|
|
||||||
This means that your volumes are protected against
|
|
||||||
hard drive and node failures
|
|
||||||
or even the failure of the data center itself.
|
|
||||||
|
|
||||||
When Ceph RBD is used for ephemeral volumes
|
|
||||||
as well as block and image storage, it supports
|
|
||||||
`live migration
|
|
||||||
<http://docs.openstack.org/admin-guide/compute-live-migration-usage.html>`_
|
|
||||||
of VMs with ephemeral drives;
|
|
||||||
LVM only supports live migration of volume-backed VMs.
|
|
||||||
|
|
||||||
Remote backup facilities
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
[TODO: Add discussion of remote backup facilities
|
|
||||||
as an alternate way to secure ones data.
|
|
||||||
Include brief mention of key third-party technologies
|
|
||||||
with links to their documentation]
|
|
||||||
|
|
||||||
|
|
@ -1,238 +0,0 @@
|
|||||||
.. highlight: ini
|
|
||||||
:linenothreshold: 5
|
|
||||||
|
|
||||||
==================================
|
|
||||||
Highly available Block Storage API
|
|
||||||
==================================
|
|
||||||
|
|
||||||
Cinder provides 'block storage as a service' suitable for performance
|
|
||||||
sensitive scenarios such as databases, expandable file systems, or
|
|
||||||
providing a server with access to raw block level storage.
|
|
||||||
|
|
||||||
Persistent block storage can survive instance termination and can also
|
|
||||||
be moved across instances like any external storage device. Cinder
|
|
||||||
also has volume snapshots capability for backing up the volumes.
|
|
||||||
|
|
||||||
Making this Block Storage API service highly available in
|
|
||||||
active/passive mode involves:
|
|
||||||
|
|
||||||
- :ref:`ha-cinder-pacemaker`
|
|
||||||
- :ref:`ha-cinder-configure`
|
|
||||||
- :ref:`ha-cinder-services`
|
|
||||||
|
|
||||||
In theory, you can run the Block Storage service as active/active.
|
|
||||||
However, because of sufficient concerns, it is recommended running
|
|
||||||
the volume component as active/passive only.
|
|
||||||
|
|
||||||
Jon Bernard writes:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
Requests are first seen by Cinder in the API service, and we have a
|
|
||||||
fundamental problem there - a standard test-and-set race condition
|
|
||||||
exists for many operations where the volume status is first checked
|
|
||||||
for an expected status and then (in a different operation) updated to
|
|
||||||
a pending status. The pending status indicates to other incoming
|
|
||||||
requests that the volume is undergoing a current operation, however it
|
|
||||||
is possible for two simultaneous requests to race here, which
|
|
||||||
undefined results.
|
|
||||||
|
|
||||||
Later, the manager/driver will receive the message and carry out the
|
|
||||||
operation. At this stage there is a question of the synchronization
|
|
||||||
techniques employed by the drivers and what guarantees they make.
|
|
||||||
|
|
||||||
If cinder-volume processes exist as different process, then the
|
|
||||||
'synchronized' decorator from the lockutils package will not be
|
|
||||||
sufficient. In this case the programmer can pass an argument to
|
|
||||||
synchronized() 'external=True'. If external is enabled, then the
|
|
||||||
locking will take place on a file located on the filesystem. By
|
|
||||||
default, this file is placed in Cinder's 'state directory' in
|
|
||||||
/var/lib/cinder so won't be visible to cinder-volume instances running
|
|
||||||
on different machines.
|
|
||||||
|
|
||||||
However, the location for file locking is configurable. So an
|
|
||||||
operator could configure the state directory to reside on shared
|
|
||||||
storage. If the shared storage in use implements unix file locking
|
|
||||||
semantics, then this could provide the requisite synchronization
|
|
||||||
needed for an active/active HA configuration.
|
|
||||||
|
|
||||||
The remaining issue is that not all drivers use the synchronization
|
|
||||||
methods, and even fewer of those use the external file locks. A
|
|
||||||
sub-concern would be whether they use them correctly.
|
|
||||||
|
|
||||||
You can read more about these concerns on the
|
|
||||||
`Red Hat Bugzilla <https://bugzilla.redhat.com/show_bug.cgi?id=1193229>`_
|
|
||||||
and there is a
|
|
||||||
`psuedo roadmap <https://etherpad.openstack.org/p/cinder-kilo-stabilisation-work>`_
|
|
||||||
for addressing them upstream.
|
|
||||||
|
|
||||||
|
|
||||||
.. _ha-cinder-pacemaker:
|
|
||||||
|
|
||||||
Add Block Storage API resource to Pacemaker
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
On RHEL-based systems, you should create resources for cinder's
|
|
||||||
systemd agents and create constraints to enforce startup/shutdown
|
|
||||||
ordering:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
pcs resource create openstack-cinder-api systemd:openstack-cinder-api --clone interleave=true
|
|
||||||
pcs resource create openstack-cinder-scheduler systemd:openstack-cinder-scheduler --clone interleave=true
|
|
||||||
pcs resource create openstack-cinder-volume systemd:openstack-cinder-volume
|
|
||||||
|
|
||||||
pcs constraint order start openstack-cinder-api-clone then openstack-cinder-scheduler-clone
|
|
||||||
pcs constraint colocation add openstack-cinder-scheduler-clone with openstack-cinder-api-clone
|
|
||||||
pcs constraint order start openstack-cinder-scheduler-clone then openstack-cinder-volume
|
|
||||||
pcs constraint colocation add openstack-cinder-volume with openstack-cinder-scheduler-clone
|
|
||||||
|
|
||||||
|
|
||||||
If the Block Storage service runs on the same nodes as the other services,
|
|
||||||
then it is advisable to also include:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
pcs constraint order start openstack-keystone-clone then openstack-cinder-api-clone
|
|
||||||
|
|
||||||
Alternatively, instead of using systemd agents, download and
|
|
||||||
install the OCF resource agent:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cd /usr/lib/ocf/resource.d/openstack
|
|
||||||
# wget https://git.openstack.org/cgit/openstack/openstack-resource-agents/plain/ocf/cinder-api
|
|
||||||
# chmod a+rx *
|
|
||||||
|
|
||||||
You can now add the Pacemaker configuration for Block Storage API resource.
|
|
||||||
Connect to the Pacemaker cluster with the :command:`crm configure` command
|
|
||||||
and add the following cluster resources:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
primitive p_cinder-api ocf:openstack:cinder-api \
|
|
||||||
params config="/etc/cinder/cinder.conf"
|
|
||||||
os_password="secretsecret"
|
|
||||||
os_username="admin" \
|
|
||||||
os_tenant_name="admin"
|
|
||||||
keystone_get_token_url="http://10.0.0.11:5000/v2.0/tokens" \
|
|
||||||
op monitor interval="30s" timeout="30s"
|
|
||||||
|
|
||||||
This configuration creates ``p_cinder-api``,
|
|
||||||
a resource for managing the Block Storage API service.
|
|
||||||
|
|
||||||
The command :command:`crm configure` supports batch input,
|
|
||||||
so you may copy and paste the lines above
|
|
||||||
into your live pacemaker configuration and then make changes as required.
|
|
||||||
For example, you may enter ``edit p_ip_cinder-api``
|
|
||||||
from the :command:`crm configure` menu
|
|
||||||
and edit the resource to match your preferred virtual IP address.
|
|
||||||
|
|
||||||
Once completed, commit your configuration changes
|
|
||||||
by entering :command:`commit` from the :command:`crm configure` menu.
|
|
||||||
Pacemaker then starts the Block Storage API service
|
|
||||||
and its dependent resources on one of your nodes.
|
|
||||||
|
|
||||||
.. _ha-cinder-configure:
|
|
||||||
|
|
||||||
Configure Block Storage API service
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Edit the ``/etc/cinder/cinder.conf`` file:
|
|
||||||
|
|
||||||
On a RHEL-based system, it should look something like:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
:linenos:
|
|
||||||
|
|
||||||
[DEFAULT]
|
|
||||||
# This is the name which we should advertise ourselves as and for
|
|
||||||
# A/P installations it should be the same everywhere
|
|
||||||
host = cinder-cluster-1
|
|
||||||
|
|
||||||
# Listen on the Block Storage VIP
|
|
||||||
osapi_volume_listen = 10.0.0.11
|
|
||||||
|
|
||||||
auth_strategy = keystone
|
|
||||||
control_exchange = cinder
|
|
||||||
|
|
||||||
volume_driver = cinder.volume.drivers.nfs.NfsDriver
|
|
||||||
nfs_shares_config = /etc/cinder/nfs_exports
|
|
||||||
nfs_sparsed_volumes = true
|
|
||||||
nfs_mount_options = v3
|
|
||||||
|
|
||||||
[database]
|
|
||||||
sql_connection = mysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
|
|
||||||
max_retries = -1
|
|
||||||
|
|
||||||
[keystone_authtoken]
|
|
||||||
# 10.0.0.11 is the Keystone VIP
|
|
||||||
identity_uri = http://10.0.0.11:35357/
|
|
||||||
auth_uri = http://10.0.0.11:5000/
|
|
||||||
admin_tenant_name = service
|
|
||||||
admin_user = cinder
|
|
||||||
admin_password = CINDER_PASS
|
|
||||||
|
|
||||||
[oslo_messaging_rabbit]
|
|
||||||
# Explicitly list the rabbit hosts as it doesn't play well with HAProxy
|
|
||||||
rabbit_hosts = 10.0.0.12,10.0.0.13,10.0.0.14
|
|
||||||
# As a consequence, we also need HA queues
|
|
||||||
rabbit_ha_queues = True
|
|
||||||
heartbeat_timeout_threshold = 60
|
|
||||||
heartbeat_rate = 2
|
|
||||||
|
|
||||||
Replace ``CINDER_DBPASS`` with the password you chose for the Block Storage
|
|
||||||
database. Replace ``CINDER_PASS`` with the password you chose for the
|
|
||||||
``cinder`` user in the Identity service.
|
|
||||||
|
|
||||||
This example assumes that you are using NFS for the physical storage, which
|
|
||||||
will almost never be true in a production installation.
|
|
||||||
|
|
||||||
If you are using the Block Storage service OCF agent, some settings will
|
|
||||||
be filled in for you, resulting in a shorter configuration file:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
:linenos:
|
|
||||||
|
|
||||||
# We have to use MySQL connection to store data:
|
|
||||||
sql_connection = mysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
|
|
||||||
# Alternatively, you can switch to pymysql,
|
|
||||||
# a new Python 3 compatible library and use
|
|
||||||
# sql_connection = mysql+pymysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
|
|
||||||
# and be ready when everything moves to Python 3.
|
|
||||||
# Ref: https://wiki.openstack.org/wiki/PyMySQL_evaluation
|
|
||||||
|
|
||||||
# We bind Block Storage API to the VIP:
|
|
||||||
osapi_volume_listen = 10.0.0.11
|
|
||||||
|
|
||||||
# We send notifications to High Available RabbitMQ:
|
|
||||||
notifier_strategy = rabbit
|
|
||||||
rabbit_host = 10.0.0.11
|
|
||||||
|
|
||||||
Replace ``CINDER_DBPASS`` with the password you chose for the Block Storage
|
|
||||||
database.
|
|
||||||
|
|
||||||
.. _ha-cinder-services:
|
|
||||||
|
|
||||||
Configure OpenStack services to use highly available Block Storage API
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Your OpenStack services must now point their
|
|
||||||
Block Storage API configuration to the highly available,
|
|
||||||
virtual cluster IP address
|
|
||||||
rather than a Block Storage API server’s physical IP address
|
|
||||||
as you would for a non-HA environment.
|
|
||||||
|
|
||||||
You must create the Block Storage API endpoint with this IP.
|
|
||||||
|
|
||||||
If you are using both private and public IP addresses,
|
|
||||||
you should create two virtual IPs and define your endpoint like this:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ keystone endpoint-create --region $KEYSTONE_REGION \
|
|
||||||
--service-id $service-id \
|
|
||||||
--publicurl 'http://PUBLIC_VIP:8776/v1/%(tenant_id)s' \
|
|
||||||
--adminurl 'http://10.0.0.11:8776/v1/%(tenant_id)s' \
|
|
||||||
--internalurl 'http://10.0.0.11:8776/v1/%(tenant_id)s'
|
|
||||||
|
|
@ -1,130 +0,0 @@
|
|||||||
====================================
|
|
||||||
Highly available OpenStack Image API
|
|
||||||
====================================
|
|
||||||
|
|
||||||
The OpenStack Image service offers a service for discovering,
|
|
||||||
registering, and retrieving virtual machine images.
|
|
||||||
To make the OpenStack Image API service highly available
|
|
||||||
in active / passive mode, you must:
|
|
||||||
|
|
||||||
- :ref:`glance-api-pacemaker`
|
|
||||||
- :ref:`glance-api-configure`
|
|
||||||
- :ref:`glance-services`
|
|
||||||
|
|
||||||
This section assumes that you are familiar with the
|
|
||||||
`documentation
|
|
||||||
<http://docs.openstack.org/liberty/install-guide-ubuntu/glance.html>`_
|
|
||||||
for installing the OpenStack Image API service.
|
|
||||||
|
|
||||||
.. _glance-api-pacemaker:
|
|
||||||
|
|
||||||
Add OpenStack Image API resource to Pacemaker
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
You must first download the resource agent to your system:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cd /usr/lib/ocf/resource.d/openstack
|
|
||||||
# wget https://git.openstack.org/cgit/openstack/openstack-resource-agents/plain/ocf/glance-api
|
|
||||||
# chmod a+rx *
|
|
||||||
|
|
||||||
You can now add the Pacemaker configuration
|
|
||||||
for the OpenStack Image API resource.
|
|
||||||
Use the :command:`crm configure` command
|
|
||||||
to connect to the Pacemaker cluster
|
|
||||||
and add the following cluster resources:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
primitive p_glance-api ocf:openstack:glance-api \
|
|
||||||
params config="/etc/glance/glance-api.conf" \
|
|
||||||
os_password="secretsecret" \
|
|
||||||
os_username="admin" os_tenant_name="admin" \
|
|
||||||
os_auth_url="http://10.0.0.11:5000/v2.0/" \
|
|
||||||
op monitor interval="30s" timeout="30s"
|
|
||||||
|
|
||||||
This configuration creates ``p_glance-api``,
|
|
||||||
a resource for managing the OpenStack Image API service.
|
|
||||||
|
|
||||||
The :command:`crm configure` command supports batch input,
|
|
||||||
so you may copy and paste the above into your live Pacemaker configuration
|
|
||||||
and then make changes as required.
|
|
||||||
For example, you may enter edit ``p_ip_glance-api``
|
|
||||||
from the :command:`crm configure` menu
|
|
||||||
and edit the resource to match your preferred virtual IP address.
|
|
||||||
|
|
||||||
After completing these steps,
|
|
||||||
commit your configuration changes by entering :command:`commit`
|
|
||||||
from the :command:`crm configure` menu.
|
|
||||||
Pacemaker then starts the OpenStack Image API service
|
|
||||||
and its dependent resources on one of your nodes.
|
|
||||||
|
|
||||||
.. _glance-api-configure:
|
|
||||||
|
|
||||||
Configure OpenStack Image service API
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Edit the :file:`/etc/glance/glance-api.conf` file
|
|
||||||
to configure the OpenStack image service:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
# We have to use MySQL connection to store data:
|
|
||||||
sql_connection=mysql://glance:password@10.0.0.11/glance
|
|
||||||
# Alternatively, you can switch to pymysql,
|
|
||||||
# a new Python 3 compatible library and use
|
|
||||||
# sql_connection=mysql+pymysql://glance:password@10.0.0.11/glance
|
|
||||||
# and be ready when everything moves to Python 3.
|
|
||||||
# Ref: https://wiki.openstack.org/wiki/PyMySQL_evaluation
|
|
||||||
|
|
||||||
# We bind OpenStack Image API to the VIP:
|
|
||||||
bind_host = 10.0.0.11
|
|
||||||
|
|
||||||
# Connect to OpenStack Image registry service:
|
|
||||||
registry_host = 10.0.0.11
|
|
||||||
|
|
||||||
# We send notifications to High Available RabbitMQ:
|
|
||||||
notifier_strategy = rabbit
|
|
||||||
rabbit_host = 10.0.0.11
|
|
||||||
|
|
||||||
[TODO: need more discussion of these parameters]
|
|
||||||
|
|
||||||
.. _glance-services:
|
|
||||||
|
|
||||||
Configure OpenStack services to use highly available OpenStack Image API
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Your OpenStack services must now point
|
|
||||||
their OpenStack Image API configuration to the highly available,
|
|
||||||
virtual cluster IP address
|
|
||||||
instead of pointint to the physical IP address
|
|
||||||
of an OpenStack Image API server
|
|
||||||
as you would in a non-HA cluster.
|
|
||||||
|
|
||||||
For OpenStack Compute, for example,
|
|
||||||
if your OpenStack Image API service IP address is 10.0.0.11
|
|
||||||
(as in the configuration explained here),
|
|
||||||
you would use the following configuration in your :file:`nova.conf` file:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
|
|
||||||
[glance]
|
|
||||||
...
|
|
||||||
api_servers = 10.0.0.11
|
|
||||||
...
|
|
||||||
|
|
||||||
|
|
||||||
You must also create the OpenStack Image API endpoint with this IP address.
|
|
||||||
If you are using both private and public IP addresses,
|
|
||||||
you should create two virtual IP addresses
|
|
||||||
and define your endpoint like this:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ keystone endpoint-create --region $KEYSTONE_REGION \
|
|
||||||
--service-id $service-id --publicurl 'http://PUBLIC_VIP:9292' \
|
|
||||||
--adminurl 'http://10.0.0.11:9292' \
|
|
||||||
--internalurl 'http://10.0.0.11:9292'
|
|
||||||
|
|
||||||
|
|
@ -1,101 +0,0 @@
|
|||||||
.. highlight: ini
|
|
||||||
:linenothreshold: 5
|
|
||||||
|
|
||||||
========================================
|
|
||||||
Highly available Shared File Systems API
|
|
||||||
========================================
|
|
||||||
|
|
||||||
Making the Shared File Systems (manila) API service highly available
|
|
||||||
in active/passive mode involves:
|
|
||||||
|
|
||||||
- :ref:`ha-manila-pacemaker`
|
|
||||||
- :ref:`ha-manila-configure`
|
|
||||||
- :ref:`ha-manila-services`
|
|
||||||
|
|
||||||
.. _ha-manila-pacemaker:
|
|
||||||
|
|
||||||
Add Shared File Systems API resource to Pacemaker
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
You must first download the resource agent to your system:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
# cd /usr/lib/ocf/resource.d/openstack
|
|
||||||
# wget https://git.openstack.org/cgit/openstack/openstack-resource-agents/plain/ocf/manila-api
|
|
||||||
# chmod a+rx *
|
|
||||||
|
|
||||||
You can now add the Pacemaker configuration for the Shared File Systems
|
|
||||||
API resource. Connect to the Pacemaker cluster with the
|
|
||||||
:command:`crm configure` command and add the following cluster resources:
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
primitive p_manila-api ocf:openstack:manila-api \
|
|
||||||
params config="/etc/manila/manila.conf"
|
|
||||||
os_password="secretsecret"
|
|
||||||
os_username="admin" \
|
|
||||||
os_tenant_name="admin"
|
|
||||||
keystone_get_token_url="http://10.0.0.11:5000/v2.0/tokens" \
|
|
||||||
op monitor interval="30s" timeout="30s"
|
|
||||||
|
|
||||||
This configuration creates ``p_manila-api``, a resource for managing the
|
|
||||||
Shared File Systems API service.
|
|
||||||
|
|
||||||
The :command:`crm configure` supports batch input, so you may copy and paste
|
|
||||||
the lines above into your live Pacemaker configuration and then make changes
|
|
||||||
as required. For example, you may enter ``edit p_ip_manila-api`` from the
|
|
||||||
:command:`crm configure` menu and edit the resource to match your preferred
|
|
||||||
virtual IP address.
|
|
||||||
|
|
||||||
Once completed, commit your configuration changes by entering :command:`commit`
|
|
||||||
from the :command:`crm configure` menu. Pacemaker then starts the
|
|
||||||
Shared File Systems API service and its dependent resources on one of your
|
|
||||||
nodes.
|
|
||||||
|
|
||||||
.. _ha-manila-configure:
|
|
||||||
|
|
||||||
Configure Shared File Systems API service
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Edit the :file:`/etc/manila/manila.conf` file:
|
|
||||||
|
|
||||||
.. code-block:: ini
|
|
||||||
:linenos:
|
|
||||||
|
|
||||||
# We have to use MySQL connection to store data:
|
|
||||||
sql_connection = mysql+pymysql://manila:password@10.0.0.11/manila?charset=utf8
|
|
||||||
|
|
||||||
# We bind Shared File Systems API to the VIP:
|
|
||||||
osapi_volume_listen = 10.0.0.11
|
|
||||||
|
|
||||||
# We send notifications to High Available RabbitMQ:
|
|
||||||
notifier_strategy = rabbit
|
|
||||||
rabbit_host = 10.0.0.11
|
|
||||||
|
|
||||||
|
|
||||||
.. _ha-manila-services:
|
|
||||||
|
|
||||||
Configure OpenStack services to use HA Shared File Systems API
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
Your OpenStack services must now point their Shared File Systems API
|
|
||||||
configuration to the highly available, virtual cluster IP address rather than
|
|
||||||
a Shared File Systems API server’s physical IP address as you would
|
|
||||||
for a non-HA environment.
|
|
||||||
|
|
||||||
You must create the Shared File Systems API endpoint with this IP.
|
|
||||||
|
|
||||||
If you are using both private and public IP addresses, you should create two
|
|
||||||
virtual IPs and define your endpoints like this:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ openstack endpoint create --region RegionOne \
|
|
||||||
sharev2 public 'http://PUBLIC_VIP:8786/v2/%(tenant_id)s'
|
|
||||||
|
|
||||||
$ openstack endpoint create --region RegionOne \
|
|
||||||
sharev2 internal 'http://10.0.0.11:8786/v2/%(tenant_id)s'
|
|
||||||
|
|
||||||
$ openstack endpoint create --region RegionOne \
|
|
||||||
sharev2 admin 'http://10.0.0.11:8786/v2/%(tenant_id)s'
|
|
@ -1,13 +0,0 @@
|
|||||||
=========================================
|
|
||||||
Configuring Storage for high availability
|
|
||||||
=========================================
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
storage-ha-cinder.rst
|
|
||||||
storage-ha-glance.rst
|
|
||||||
storage-ha-manila.rst
|
|
||||||
storage-ha-backend.rst
|
|
||||||
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
|||||||
# This is a cross-platform list tracking distribution packages needed by tests;
|
|
||||||
# see http://docs.openstack.org/infra/bindep/ for additional information.
|
|
||||||
|
|
||||||
gettext
|
|
||||||
libxml2-dev [platform:dpkg]
|
|
||||||
libxml2-devel [platform:rpm]
|
|
||||||
libxml2-utils [platform:dpkg]
|
|
||||||
libxslt-devel [platform:rpm]
|
|
||||||
libxslt1-dev [platform:dpkg]
|
|
||||||
python-dev [platform:dpkg]
|
|
||||||
python-lxml
|
|
||||||
zlib-devel [platform:rpm]
|
|
||||||
zlib1g-dev [platform:dpkg]
|
|
@ -1,9 +0,0 @@
|
|||||||
# The order of packages is significant, because pip processes them in the order
|
|
||||||
# of appearance. Changing the order has an impact on the overall integration
|
|
||||||
# process, which may cause wedges in the gate later.
|
|
||||||
|
|
||||||
openstack-doc-tools>=0.31
|
|
||||||
|
|
||||||
sphinx>=1.1.2,!=1.2.0,!=1.3b1,<1.3
|
|
||||||
openstackdocstheme>=1.2.3
|
|
||||||
doc8 # Apache-2.0
|
|
@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash -e
|
|
||||||
|
|
||||||
mkdir -p publish-docs
|
|
||||||
|
|
||||||
doc-tools-build-rst doc/ha-guide --build build \
|
|
||||||
--target ha-guide
|
|
@ -1,42 +0,0 @@
|
|||||||
#!/bin/bash -xe
|
|
||||||
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
||||||
# not use this file except in compliance with the License. You may obtain
|
|
||||||
# a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
||||||
# License for the specific language governing permissions and limitations
|
|
||||||
# under the License.
|
|
||||||
|
|
||||||
DOCNAME=$1
|
|
||||||
|
|
||||||
if [ -z "$DOCNAME" ] ; then
|
|
||||||
echo "usage $0 DOCNAME"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# We're not doing anything for this directory.
|
|
||||||
if [[ "$DOCNAME" = "common" ]] ; then
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
rm -f doc/$DOCNAME/source/locale/$DOCNAME.pot
|
|
||||||
sphinx-build -b gettext doc/$DOCNAME/source/ doc/$DOCNAME/source/locale/
|
|
||||||
|
|
||||||
# common is translated as part of openstack-manuals, do not
|
|
||||||
# include the file in the combined tree.
|
|
||||||
rm doc/$DOCNAME/source/locale/common.pot
|
|
||||||
|
|
||||||
# Take care of deleting all temporary files so that git add
|
|
||||||
# doc/$DOCNAME/source/locale will only add the single pot file.
|
|
||||||
# Remove UUIDs, those are not necessary and change too often
|
|
||||||
msgcat --sort-by-file doc/$DOCNAME/source/locale/*.pot | \
|
|
||||||
awk '$0 !~ /^\# [a-z0-9]+$/' > doc/$DOCNAME/source/$DOCNAME.pot
|
|
||||||
rm doc/$DOCNAME/source/locale/*.pot
|
|
||||||
rm -rf doc/$DOCNAME/source/locale/.doctrees/
|
|
||||||
mv doc/$DOCNAME/source/$DOCNAME.pot doc/$DOCNAME/source/locale/$DOCNAME.pot
|
|
76
tox.ini
76
tox.ini
@ -1,76 +0,0 @@
|
|||||||
[tox]
|
|
||||||
minversion = 1.6
|
|
||||||
envlist = checkniceness,checkbuild,checklang
|
|
||||||
skipsdist = True
|
|
||||||
|
|
||||||
[testenv]
|
|
||||||
basepython = python2
|
|
||||||
setenv =
|
|
||||||
VIRTUAL_ENV={envdir}
|
|
||||||
deps = -r{toxinidir}/test-requirements.txt
|
|
||||||
whitelist_externals =
|
|
||||||
bash
|
|
||||||
cp
|
|
||||||
mkdir
|
|
||||||
rm
|
|
||||||
rsync
|
|
||||||
sed
|
|
||||||
|
|
||||||
[testenv:venv]
|
|
||||||
commands = {posargs}
|
|
||||||
|
|
||||||
[testenv:checkniceness]
|
|
||||||
commands =
|
|
||||||
doc8 doc
|
|
||||||
|
|
||||||
[testenv:checkbuild]
|
|
||||||
commands =
|
|
||||||
# Build and copy RST Guides
|
|
||||||
{toxinidir}/tools/build-all-rst.sh
|
|
||||||
# This only generates the index page
|
|
||||||
openstack-indexpage publish-docs
|
|
||||||
|
|
||||||
[testenv:publishdocs]
|
|
||||||
# Prepare all documents (except www subdir) so that they can get
|
|
||||||
# published on docs.openstack.org with just copying publish-docs/*
|
|
||||||
# over.
|
|
||||||
commands =
|
|
||||||
# Build and copy RST Guides
|
|
||||||
{toxinidir}/tools/build-all-rst.sh
|
|
||||||
|
|
||||||
[testenv:checklang]
|
|
||||||
whitelist_externals =
|
|
||||||
doc-tools-check-languages
|
|
||||||
bash
|
|
||||||
commands =
|
|
||||||
doc-tools-check-languages doc-tools-check-languages.conf test all
|
|
||||||
# Check that .po and .pot files are valid:
|
|
||||||
bash -c "find doc -type f -regex '.*\.pot?' -print0|xargs -0 -n 1 msgfmt --check-format -o /dev/null"
|
|
||||||
|
|
||||||
[testenv:buildlang]
|
|
||||||
# Run as "tox -e buildlang -- $LANG"
|
|
||||||
whitelist_externals = doc-tools-check-languages
|
|
||||||
commands = doc-tools-check-languages doc-tools-check-languages.conf test {posargs}
|
|
||||||
|
|
||||||
[testenv:publishlang]
|
|
||||||
whitelist_externals = doc-tools-check-languages
|
|
||||||
commands = doc-tools-check-languages doc-tools-check-languages.conf publish all
|
|
||||||
|
|
||||||
[testenv:generatepot-rst]
|
|
||||||
# Generate POT files for translation, needs {posargs} like:
|
|
||||||
# tox -e generatepot-rst -- user-guide
|
|
||||||
commands = {toxinidir}/tools/generatepot-rst.sh {posargs}
|
|
||||||
|
|
||||||
[testenv:docs]
|
|
||||||
commands =
|
|
||||||
{toxinidir}/tools/build-all-rst.sh
|
|
||||||
|
|
||||||
[doc8]
|
|
||||||
# Settings for doc8:
|
|
||||||
# Ignore target directories
|
|
||||||
ignore-path = doc/*/target,doc/common
|
|
||||||
# File extensions to use
|
|
||||||
extensions = .rst,.txt
|
|
||||||
# Disable some doc8 checks:
|
|
||||||
# D000: Check RST validity (cannot handle lineos directive)
|
|
||||||
ignore = D000
|
|
Loading…
x
Reference in New Issue
Block a user