From aaa85e3fc58002ae5bbeb265ce265534318179d3 Mon Sep 17 00:00:00 2001 From: "Parsons, Cliff (cp769u)" Date: Tue, 6 Apr 2021 18:43:33 +0000 Subject: [PATCH] Refactor Ceph OSD Init Scripts - First PS This is the first of multiple updates to ceph-osd where the OSD init code will be refactored for better sustainability. This patchset makes 2 changes: 1) Removes "ceph-disk" support, as ceph-disk was removed from the ceph image since nautilus. 2) Separates the initialization code for the bluestore, filestore, and directory backend configuration options. Change-Id: I116ce9cc8d3bac870adba8b84677ec652bbb0dd4 --- ceph-osd/Chart.yaml | 2 +- ceph-osd/templates/bin/osd/_directory.sh.tpl | 2 +- .../templates/bin/osd/ceph-disk/_block.sh.tpl | 131 ------- .../bin/osd/ceph-disk/_bluestore.sh.tpl | 75 ---- .../bin/osd/ceph-disk/_common.sh.tpl | 260 -------------- .../osd/ceph-disk/_init-with-ceph-disk.sh.tpl | 231 ------------ ...it-ceph-volume-helper-block-logical.sh.tpl | 237 +++++++++++++ .../_init-ceph-volume-helper-bluestore.sh.tpl | 191 ++++++++++ .../_init-ceph-volume-helper-directory.sh.tpl | 23 ++ .../ceph-volume/_init-with-ceph-volume.sh.tpl | 335 ++---------------- ceph-osd/templates/configmap-bin.yaml | 16 +- ceph-osd/templates/daemonset-osd.yaml | 32 +- ceph-osd/values.yaml | 6 +- releasenotes/notes/ceph-osd.yaml | 1 + 14 files changed, 501 insertions(+), 1041 deletions(-) delete mode 100644 ceph-osd/templates/bin/osd/ceph-disk/_block.sh.tpl delete mode 100644 ceph-osd/templates/bin/osd/ceph-disk/_bluestore.sh.tpl delete mode 100644 ceph-osd/templates/bin/osd/ceph-disk/_common.sh.tpl delete mode 100644 ceph-osd/templates/bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl create mode 100644 ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-block-logical.sh.tpl create mode 100644 ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-bluestore.sh.tpl create mode 100644 ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-directory.sh.tpl diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml index 09892a5b96..bd123071fb 100644 --- a/ceph-osd/Chart.yaml +++ b/ceph-osd/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph OSD name: ceph-osd -version: 0.1.20 +version: 0.1.21 home: https://github.com/ceph/ceph ... diff --git a/ceph-osd/templates/bin/osd/_directory.sh.tpl b/ceph-osd/templates/bin/osd/_directory.sh.tpl index a926728019..e32342730d 100644 --- a/ceph-osd/templates/bin/osd/_directory.sh.tpl +++ b/ceph-osd/templates/bin/osd/_directory.sh.tpl @@ -17,7 +17,7 @@ limitations under the License. set -ex export LC_ALL=C -source /tmp/osd-common-ceph-disk.sh +source /tmp/osd-common-ceph-volume.sh : "${JOURNAL_DIR:=/var/lib/ceph/journal}" diff --git a/ceph-osd/templates/bin/osd/ceph-disk/_block.sh.tpl b/ceph-osd/templates/bin/osd/ceph-disk/_block.sh.tpl deleted file mode 100644 index af8eb03d62..0000000000 --- a/ceph-osd/templates/bin/osd/ceph-disk/_block.sh.tpl +++ /dev/null @@ -1,131 +0,0 @@ -#!/bin/bash - -{{/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -source /tmp/osd-common-ceph-disk.sh - -set -ex - -: "${OSD_SOFT_FORCE_ZAP:=1}" -: "${OSD_JOURNAL_DISK:=}" - -if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then - export OSD_DEVICE="/var/lib/ceph/osd" -else - export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION}) -fi - -if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then - export OSD_JOURNAL="/var/lib/ceph/journal" -else - export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) -fi - -if [[ -z "${OSD_DEVICE}" ]];then - echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb" - exit 1 -fi - -if [[ ! -b "${OSD_DEVICE}" ]]; then - echo "ERROR- The device pointed by OSD_DEVICE ${OSD_DEVICE} doesn't exist !" - exit 1 -fi - -CEPH_DISK_OPTIONS="" -CEPH_OSD_OPTIONS="" -DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1) - -udev_settle - -DATA_PART=$(dev_part ${OSD_DEVICE} 1) -MOUNTED_PART=${DATA_PART} - -ceph-disk -v \ - --setuser ceph \ - --setgroup disk \ - activate ${CEPH_DISK_OPTIONS} \ - --no-start-daemon ${DATA_PART} - -OSD_ID=$(grep "${MOUNTED_PART}" /proc/mounts | awk '{print $2}' | grep -oh '[0-9]*') - -OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}" -OSD_KEYRING="${OSD_PATH}/keyring" -# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing -OSD_WEIGHT=0 -# NOTE(supamatt): add or move the OSD's CRUSH location -crush_location - -if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then - if [ -n "${OSD_JOURNAL}" ]; then - if [ -b "${OSD_JOURNAL}" ]; then - OSD_JOURNAL_DISK="$(readlink -f ${OSD_PATH}/journal)" - if [ -z "${OSD_JOURNAL_DISK}" ]; then - echo "ERROR: Unable to find journal device ${OSD_JOURNAL_DISK}" - exit 1 - else - OSD_JOURNAL="${OSD_JOURNAL_DISK}" - if [ -e "${OSD_PATH}/run_mkjournal" ]; then - ceph-osd -i ${OSD_ID} --mkjournal - rm -rf ${OSD_PATH}/run_mkjournal - fi - fi - fi - if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then - OSD_JOURNAL="${OSD_JOURNAL}/journal.${OSD_ID}" - touch ${OSD_JOURNAL} - wait_for_file "${OSD_JOURNAL}" - else - if [ ! -b "${OSD_JOURNAL}" ]; then - echo "ERROR: Unable to find journal device ${OSD_JOURNAL}" - exit 1 - else - chown ceph. "${OSD_JOURNAL}" - fi - fi - else - wait_for_file "${OSD_JOURNAL}" - chown ceph. "${OSD_JOURNAL}" - fi -fi - -# NOTE(supamatt): Just in case permissions do not align up, we recursively set them correctly. -if [ $(stat -c%U ${OSD_PATH}) != ceph ]; then - chown -R ceph. ${OSD_PATH}; -fi - -# NOTE(gagehugo): Writing the OSD_ID to tmp for logging -echo "${OSD_ID}" > /tmp/osd-id - -if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then - chown -R ceph. /var/lib/ceph/journal - ceph-osd \ - --cluster ceph \ - --osd-data ${OSD_PATH} \ - --osd-journal ${OSD_JOURNAL} \ - -f \ - -i ${OSD_ID} \ - --setuser ceph \ - --setgroup disk \ - --mkjournal -fi - -exec /usr/bin/ceph-osd \ - --cluster ${CLUSTER} \ - ${CEPH_OSD_OPTIONS} \ - -f \ - -i ${OSD_ID} \ - --setuser ceph \ - --setgroup disk & echo $! > /run/ceph-osd.pid -wait diff --git a/ceph-osd/templates/bin/osd/ceph-disk/_bluestore.sh.tpl b/ceph-osd/templates/bin/osd/ceph-disk/_bluestore.sh.tpl deleted file mode 100644 index dfb6c6cc3d..0000000000 --- a/ceph-osd/templates/bin/osd/ceph-disk/_bluestore.sh.tpl +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash - -{{/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -source /tmp/osd-common-ceph-disk.sh - -set -ex - -: "${OSD_SOFT_FORCE_ZAP:=1}" - -export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION}) - -if [[ -z "${OSD_DEVICE}" ]];then - echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb" - exit 1 -fi - -if [[ ! -b "${OSD_DEVICE}" ]]; then - echo "ERROR- The device pointed by OSD_DEVICE ${OSD_DEVICE} doesn't exist !" - exit 1 -fi - -CEPH_DISK_OPTIONS="" -CEPH_OSD_OPTIONS="" -DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1) - -udev_settle - -DATA_PART=$(dev_part ${OSD_DEVICE} 1) -MOUNTED_PART=${DATA_PART} - -ceph-disk -v \ - --setuser ceph \ - --setgroup disk \ - activate ${CEPH_DISK_OPTIONS} \ - --no-start-daemon ${DATA_PART} - -OSD_ID=$(grep "${MOUNTED_PART}" /proc/mounts | awk '{print $2}' | grep -oh '[0-9]*') - -OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}" -OSD_KEYRING="${OSD_PATH}/keyring" -# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing -OSD_WEIGHT=0 -# NOTE(supamatt): add or move the OSD's CRUSH location -crush_location - - -# NOTE(supamatt): Just in case permissions do not align up, we recursively set them correctly. -if [ $(stat -c%U ${OSD_PATH}) != ceph ]; then - chown -R ceph. ${OSD_PATH}; -fi - -# NOTE(gagehugo): Writing the OSD_ID to tmp for logging -echo "${OSD_ID}" > /tmp/osd-id - -exec /usr/bin/ceph-osd \ - --cluster ${CLUSTER} \ - ${CEPH_OSD_OPTIONS} \ - -f \ - -i ${OSD_ID} \ - --setuser ceph \ - --setgroup disk & echo $! > /run/ceph-osd.pid -wait diff --git a/ceph-osd/templates/bin/osd/ceph-disk/_common.sh.tpl b/ceph-osd/templates/bin/osd/ceph-disk/_common.sh.tpl deleted file mode 100644 index db0275ad45..0000000000 --- a/ceph-osd/templates/bin/osd/ceph-disk/_common.sh.tpl +++ /dev/null @@ -1,260 +0,0 @@ -#!/bin/bash - -{{/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -set -ex -export PS4='+${BASH_SOURCE:+$(basename ${BASH_SOURCE}):${LINENO}:}${FUNCNAME:+${FUNCNAME}():} ' - -: "${CRUSH_LOCATION:=root=default host=${HOSTNAME}}" -: "${OSD_PATH_BASE:=/var/lib/ceph/osd/${CLUSTER}}" -: "${CEPH_CONF:="/etc/ceph/${CLUSTER}.conf"}" -: "${OSD_BOOTSTRAP_KEYRING:=/var/lib/ceph/bootstrap-osd/${CLUSTER}.keyring}" -: "${OSD_JOURNAL_UUID:=$(uuidgen)}" -: "${OSD_JOURNAL_SIZE:=$(awk '/^osd_journal_size/{print $3}' ${CEPH_CONF}.template)}" -: "${OSD_WEIGHT:=1.0}" - -eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python3 -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))') -eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python3 -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') -eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python3 -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))') -eval CRUSH_FAILURE_DOMAIN_FROM_HOSTNAME_MAP=$(cat /etc/ceph/storage.json | jq '.failure_domain_by_hostname_map."'$HOSTNAME'"') -eval DEVICE_CLASS=$(cat /etc/ceph/storage.json | python3 -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["device_class"]))') - -if [[ $(ceph -v | egrep "octopus|nautilus|mimic|luminous" > /dev/null 2>&1; echo $?) -ne 0 ]]; then - echo "ERROR- need Luminous/Mimic/Nautilus/Octopus release" - exit 1 -fi - -if [ -z "${HOSTNAME}" ]; then - echo "HOSTNAME not set; This will prevent to add an OSD into the CRUSH map" - exit 1 -fi - -if [[ ! -e ${CEPH_CONF}.template ]]; then - echo "ERROR- ${CEPH_CONF}.template must exist; get it from your existing mon" - exit 1 -else - ENDPOINT=$(kubectl get endpoints ceph-mon-discovery -n ${NAMESPACE} -o json | awk -F'"' -v port=${MON_PORT} \ - -v version=v1 -v msgr_version=v2 \ - -v msgr2_port=${MON_PORT_V2} \ - '/"ip"/{print "["version":"$4":"port"/"0","msgr_version":"$4":"msgr2_port"/"0"]"}' | paste -sd',') - if [[ "${ENDPOINT}" == "" ]]; then - /bin/sh -c -e "cat ${CEPH_CONF}.template | tee ${CEPH_CONF}" || true - else - /bin/sh -c -e "cat ${CEPH_CONF}.template | sed 's#mon_host.*#mon_host = ${ENDPOINT}#g' | tee ${CEPH_CONF}" || true - fi -fi - -# Wait for a file to exist, regardless of the type -function wait_for_file { - timeout 10 bash -c "while [ ! -e ${1} ]; do echo 'Waiting for ${1} to show up' && sleep 1 ; done" -} - -function is_available { - command -v $@ &>/dev/null -} - -function ceph_cmd_retry() { - cnt=0 - until "ceph" "$@" || [ $cnt -ge 6 ]; do - sleep 10 - ((cnt++)) - done -} - -function crush_create_or_move { - local crush_location=${1} - ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ - osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location} -} - -function crush_add_and_move { - local crush_failure_domain_type=${1} - local crush_failure_domain_name=${2} - local crush_location=$(echo "root=default ${crush_failure_domain_type}=${crush_failure_domain_name} host=${HOSTNAME}") - crush_create_or_move "${crush_location}" - local crush_failure_domain_location_check=$(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}') - if [ "x${crush_failure_domain_location_check}" != "x${crush_failure_domain_name}" ]; then - # NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations - # as create-or-move may not appropiately move them. - ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ - osd crush add-bucket "${crush_failure_domain_name}" "${crush_failure_domain_type}" || true - ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ - osd crush move "${crush_failure_domain_name}" root=default || true - ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ - osd crush move "${HOSTNAME}" "${crush_failure_domain_type}=${crush_failure_domain_name}" || true - fi -} - -function crush_location { - set_device_class - if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then - if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then - crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}" - elif [ "x${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}" != "xfalse" ]; then - crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "$(echo ${CRUSH_FAILURE_DOMAIN_TYPE}_$(echo ${HOSTNAME} | cut -c ${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}))" - elif [ "x${CRUSH_FAILURE_DOMAIN_FROM_HOSTNAME_MAP}" != "xnull" ]; then - crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_FROM_HOSTNAME_MAP}" - else - # NOTE(supamatt): neither variables are defined then we fall back to default behavior - crush_create_or_move "${CRUSH_LOCATION}" - fi - else - crush_create_or_move "${CRUSH_LOCATION}" - fi -} - -# Calculate proper device names, given a device and partition number -function dev_part { - local osd_device=${1} - local osd_partition=${2} - - if [[ -L ${osd_device} ]]; then - # This device is a symlink. Work out it's actual device - local actual_device=$(readlink -f "${osd_device}") - local bn=$(basename "${osd_device}") - if [[ "${actual_device:0-1:1}" == [0-9] ]]; then - local desired_partition="${actual_device}p${osd_partition}" - else - local desired_partition="${actual_device}${osd_partition}" - fi - # Now search for a symlink in the directory of $osd_device - # that has the correct desired partition, and the longest - # shared prefix with the original symlink - local symdir=$(dirname "${osd_device}") - local link="" - local pfxlen=0 - for option in ${symdir}/*; do - [[ -e $option ]] || break - if [[ $(readlink -f "${option}") == "${desired_partition}" ]]; then - local optprefixlen=$(prefix_length "${option}" "${bn}") - if [[ ${optprefixlen} > ${pfxlen} ]]; then - link=${symdir}/${option} - pfxlen=${optprefixlen} - fi - fi - done - if [[ $pfxlen -eq 0 ]]; then - >&2 echo "Could not locate appropriate symlink for partition ${osd_partition} of ${osd_device}" - exit 1 - fi - echo "$link" - elif [[ "${osd_device:0-1:1}" == [0-9] ]]; then - echo "${osd_device}p${osd_partition}" - else - echo "${osd_device}${osd_partition}" - fi -} - -function zap_extra_partitions { - # Examine temp mount and delete any block.db and block.wal partitions - mountpoint=${1} - journal_disk="" - journal_part="" - block_db_disk="" - block_db_part="" - block_wal_disk="" - block_wal_part="" - - # Discover journal, block.db, and block.wal partitions first before deleting anything - # If the partitions are on the same disk, deleting one can affect discovery of the other(s) - if [ -L "${mountpoint}/journal" ]; then - journal_disk=$(readlink -m ${mountpoint}/journal | sed 's/[0-9]*//g') - journal_part=$(readlink -m ${mountpoint}/journal | sed 's/[^0-9]*//g') - fi - if [ -L "${mountpoint}/block.db" ]; then - block_db_disk=$(readlink -m ${mountpoint}/block.db | sed 's/[0-9]*//g') - block_db_part=$(readlink -m ${mountpoint}/block.db | sed 's/[^0-9]*//g') - fi - if [ -L "${mountpoint}/block.wal" ]; then - block_wal_disk=$(readlink -m ${mountpoint}/block.wal | sed 's/[0-9]*//g') - block_wal_part=$(readlink -m ${mountpoint}/block.wal | sed 's/[^0-9]*//g') - fi - - # Delete any discovered journal, block.db, and block.wal partitions - if [ ! -z "${journal_disk}" ]; then - sgdisk -d ${journal_part} ${journal_disk} - /sbin/udevadm settle --timeout=600 - /usr/bin/flock -s ${journal_disk} /sbin/partprobe ${journal_disk} - /sbin/udevadm settle --timeout=600 - fi - if [ ! -z "${block_db_disk}" ]; then - sgdisk -d ${block_db_part} ${block_db_disk} - /sbin/udevadm settle --timeout=600 - /usr/bin/flock -s ${block_db_disk} /sbin/partprobe ${block_db_disk} - /sbin/udevadm settle --timeout=600 - fi - if [ ! -z "${block_wal_disk}" ]; then - sgdisk -d ${block_wal_part} ${block_wal_disk} - /sbin/udevadm settle --timeout=600 - /usr/bin/flock -s ${block_wal_disk} /sbin/partprobe ${block_wal_disk} - /sbin/udevadm settle --timeout=600 - fi -} - -function disk_zap { - # Run all the commands that ceph-disk zap uses to clear a disk - local device=${1} - wipefs --all ${device} - # Wipe the first 200MB boundary, as Bluestore redeployments will not work otherwise - dd if=/dev/zero of=${device} bs=1M count=200 - sgdisk --zap-all -- ${device} - sgdisk --clear --mbrtogpt -- ${device} -} - -function udev_settle { - partprobe "${OSD_DEVICE}" - if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then - if [ ! -z "$BLOCK_DB" ]; then - partprobe "${BLOCK_DB}" - fi - if [ ! -z "$BLOCK_WAL" ] && [ "$BLOCK_WAL" != "$BLOCK_DB" ]; then - partprobe "${BLOCK_WAL}" - fi - else - if [ "x$JOURNAL_TYPE" == "xblock-logical" ] && [ ! -z "$OSD_JOURNAL" ]; then - OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) - if [ ! -z "$OSD_JOURNAL" ]; then - local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') - partprobe "${JDEV}" - fi - fi - fi - # watch the udev event queue, and exit if all current events are handled - udevadm settle --timeout=600 - - # On occassion udev may not make the correct device symlinks for Ceph, just in case we make them manually - mkdir -p /dev/disk/by-partuuid - for dev in $(awk '!/rbd/{print $4}' /proc/partitions | grep "[0-9]"); do - diskdev=$(echo "${dev//[!a-z]/}") - partnum=$(echo "${dev//[!0-9]/}") - ln -s "../../${dev}" "/dev/disk/by-partuuid/$(sgdisk -i ${partnum} /dev/${diskdev} | awk '/Partition unique GUID/{print tolower($4)}')" || true - done -} - -function set_device_class { - if [ ! -z "$DEVICE_CLASS" ]; then - if [ "x$DEVICE_CLASS" != "x$(get_device_class)" ]; then - ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ - osd crush rm-device-class "osd.${OSD_ID}" - ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ - osd crush set-device-class "${DEVICE_CLASS}" "osd.${OSD_ID}" - fi - fi -} - -function get_device_class { - echo $(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ - osd crush get-device-class "osd.${OSD_ID}") -} diff --git a/ceph-osd/templates/bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl b/ceph-osd/templates/bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl deleted file mode 100644 index ea94e82a1d..0000000000 --- a/ceph-osd/templates/bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl +++ /dev/null @@ -1,231 +0,0 @@ -#!/bin/bash - -{{/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -set -ex - -source /tmp/osd-common-ceph-disk.sh - -: "${OSD_FORCE_REPAIR:=1}" -# We do not want to zap journal disk. Tracking this option seperatly. -: "${JOURNAL_FORCE_ZAP:=0}" - -if [ "x${STORAGE_TYPE%-*}" == "xbluestore" ]; then - export OSD_BLUESTORE=1 -fi - -if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then - export OSD_DEVICE="/var/lib/ceph/osd" -else - export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION}) -fi - -if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then - export OSD_JOURNAL="/var/lib/ceph/journal" -else - export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) -fi - -function osd_disk_prepare { - if [[ -z "${OSD_DEVICE}" ]];then - echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb" - exit 1 - fi - - if [[ ! -b "${OSD_DEVICE}" ]]; then - echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !" - exit 1 - fi - - if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then - echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'" - exit 1 - fi - timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1 - - # check device status first - if ! parted --script ${OSD_DEVICE} print > /dev/null 2>&1; then - if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then - echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway" - disk_zap ${OSD_DEVICE} - else - echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird." - echo "It would be too dangerous to destroy it without any notification." - echo "Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk." - exit 1 - fi - fi - - # then search for some ceph metadata on the disk - if [[ "$(parted --script ${OSD_DEVICE} print | egrep '^ 1.*ceph data')" ]]; then - if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then - if [ -b "${OSD_DEVICE}1" ]; then - local cephFSID=$(ceph-conf --lookup fsid) - if [ ! -z "${cephFSID}" ]; then - local tmpmnt=$(mktemp -d) - mount ${OSD_DEVICE}1 ${tmpmnt} - if [ "${OSD_BLUESTORE:-0}" -ne 1 ] && [ "x$JOURNAL_TYPE" != "xdirectory" ]; then - # we only care about journals for filestore. - if [ -f "${tmpmnt}/whoami" ]; then - OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal") - local osd_id=$(cat "${tmpmnt}/whoami") - if [ ! -b "${OSD_JOURNAL_DISK}" ]; then - OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) - local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') - if [ ${jdev} == ${OSD_JOURNAL} ]; then - echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}." - echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it." - rm -rf ${tmpmnt}/ceph_fsid - else - echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}." - echo "Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will" - echo "attempt to recreate the missing journal device partitions." - osd_journal_create ${OSD_JOURNAL} - ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal - echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid - chown ceph. ${OSD_JOURNAL} - # During OSD start we will format the journal and set the fsid - touch ${tmpmnt}/run_mkjournal - fi - fi - else - echo "It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata." - echo "The device may contain inconsistent metadata or be corrupted." - echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it." - rm -rf ${tmpmnt}/ceph_fsid - fi - fi - if [ -f "${tmpmnt}/ceph_fsid" ]; then - osdFSID=$(cat "${tmpmnt}/ceph_fsid") - if [ ${osdFSID} != ${cephFSID} ]; then - echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster." - echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}" - echo "Because OSD_FORCE_REPAIR was set, we will zap this device." - zap_extra_partitions ${tmpmnt} - umount ${tmpmnt} - disk_zap ${OSD_DEVICE} - else - umount ${tmpmnt} - echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster." - echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped." - echo "Moving on, trying to activate the OSD now." - return - fi - else - echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID." - echo "Because OSD_FORCE_REPAIR was set, we will zap this device." - zap_extra_partitions ${tmpmnt} - umount ${tmpmnt} - disk_zap ${OSD_DEVICE} - fi - else - echo "Unable to determine the FSID of the current cluster." - echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped." - echo "Moving on, trying to activate the OSD now." - return - fi - else - echo "parted says ${OSD_DEVICE}1 should exist, but we do not see it." - echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is" - echo "Moving on, trying to activate the OSD now." - return - fi - else - echo "INFO- It looks like ${OSD_DEVICE} is an OSD, set OSD_FORCE_REPAIR=1 to use this device anyway and zap its content" - echo "You can also use the disk_zap scenario on the appropriate device to zap it" - echo "Moving on, trying to activate the OSD now." - return - fi - fi - - if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then - CLI_OPTS="${CLI_OPTS} --bluestore" - - if [ ! -z "$BLOCK_DB" ]; then - CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}" - fi - - if [ ! -z "$BLOCK_WAL" ]; then - CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}" - fi - - CLI_OPTS="${CLI_OPTS} ${OSD_DEVICE}" - else - # we only care about journals for filestore. - osd_journal_prepare - - CLI_OPTS="${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE}" - - if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then - CLI_OPTS="${CLI_OPTS} --journal-file" - else - CLI_OPTS="${CLI_OPTS} ${OSD_JOURNAL}" - fi - fi - - udev_settle - ceph-disk -v prepare ${CLI_OPTS} - - if [ ! -z "$DEVICE_CLASS" ]; then - local osd_id=$(cat "/var/lib/ceph/osd/*/whoami") - ceph osd crush rm-device-class osd."${osd_id}" - ceph osd crush set-device-class "${DEVICE_CLASS}" osd."${osd_id}" - fi -} - -function osd_journal_create { - local osd_journal=${1} - local osd_journal_partition=$(echo ${osd_journal} | sed 's/[^0-9]//g') - local jdev=$(echo ${osd_journal} | sed 's/[0-9]//g') - if [ -b "${jdev}" ]; then - sgdisk --new=${osd_journal_partition}:0:+${OSD_JOURNAL_SIZE}M \ - --change-name='${osd_journal_partition}:ceph journal' \ - --partition-guid=${osd_journal_partition}:${OSD_JOURNAL_UUID} \ - --typecode=${osd_journal_partition}:45b0969e-9b03-4f30-b4c6-b4b80ceff106 --mbrtogpt -- ${jdev} - OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition}) - udev_settle - else - echo "The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system." - exit 1 - fi -} - -function osd_journal_prepare { - if [ -n "${OSD_JOURNAL}" ]; then - if [ -b ${OSD_JOURNAL} ]; then - OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) - OSD_JOURNAL_PARTITION=$(echo ${OSD_JOURNAL} | sed 's/[^0-9]//g') - local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') - if [ -z "${OSD_JOURNAL_PARTITION}" ]; then - OSD_JOURNAL=$(dev_part ${jdev} ${OSD_JOURNAL_PARTITION}) - else - OSD_JOURNAL=${OSD_JOURNAL} - fi - elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then - # The block device exists but doesn't appear to be paritioned, we will proceed with parititioning the device. - OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) - osd_journal_create ${OSD_JOURNAL} - fi - chown ceph. ${OSD_JOURNAL} - elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then - echo "No journal device specified. OSD and journal will share ${OSD_DEVICE}" - echo "For better performance on HDD, consider moving your journal to a separate device" - fi - CLI_OPTS="${CLI_OPTS} --filestore" -} - -if ! [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then - osd_disk_prepare -fi diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-block-logical.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-block-logical.sh.tpl new file mode 100644 index 0000000000..d247fd4a42 --- /dev/null +++ b/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-block-logical.sh.tpl @@ -0,0 +1,237 @@ +#!/bin/bash + +{{/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex + +# We do not want to zap journal disk. Tracking this option seperatly. +: "${JOURNAL_FORCE_ZAP:=0}" + +export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION}) +export OSD_BLUESTORE=0 + +if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then + export OSD_JOURNAL="/var/lib/ceph/journal" +else + export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) +fi + +function osd_disk_prepare { + if [[ -z "${OSD_DEVICE}" ]]; then + echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb" + exit 1 + fi + + if [[ ! -b "${OSD_DEVICE}" ]]; then + echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !" + exit 1 + fi + + if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then + echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'" + exit 1 + fi + timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1 + + #search for some ceph metadata on the disk based on the status of the disk/lvm in filestore + CEPH_DISK_USED=0 + CEPH_LVM_PREPARE=1 + udev_settle + OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE}) + OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE}) + CLUSTER_FSID=$(ceph-conf --lookup fsid) + DISK_ZAPPED=0 + + if [[ ! -z ${OSD_ID} ]]; then + DM_NUM=$(dmsetup ls | grep $(lsblk -J ${OSD_DEVICE} | jq -r '.blockdevices[].children[].name') | awk '{print $2}' | cut -d':' -f2 | cut -d')' -f1) + DM_DEV="/dev/dm-"${DM_NUM} + elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then + DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') + CEPH_DISK_USED=1 + else + if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then + echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway" + disk_zap ${OSD_DEVICE} + DISK_ZAPPED=1 + else + echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird." + echo "It would be too dangerous to destroy it without any notification." + echo "Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk." + exit 1 + fi + fi + + if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then + if [ -b $DM_DEV ]; then + local cephFSID=$(ceph-conf --lookup fsid) + if [ ! -z "${cephFSID}" ]; then + local tmpmnt=$(mktemp -d) + mount ${DM_DEV} ${tmpmnt} + if [ "x$JOURNAL_TYPE" != "xdirectory" ]; then + if [ -f "${tmpmnt}/whoami" ]; then + OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal") + local osd_id=$(cat "${tmpmnt}/whoami") + if [ ! -b "${OSD_JOURNAL_DISK}" ]; then + OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) + local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') + if [ ${jdev} == ${OSD_JOURNAL} ]; then + echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}." + echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it." + rm -rf ${tmpmnt}/ceph_fsid + else + echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}." + echo "Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will" + echo "attempt to recreate the missing journal device partitions." + osd_journal_create ${OSD_JOURNAL} + ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal + echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid + chown ceph. ${OSD_JOURNAL} + # During OSD start we will format the journal and set the fsid + touch ${tmpmnt}/run_mkjournal + fi + fi + else + echo "It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata." + echo "The device may contain inconsistent metadata or be corrupted." + echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it." + rm -rf ${tmpmnt}/ceph_fsid + fi + fi + if [ -f "${tmpmnt}/ceph_fsid" ]; then + osdFSID=$(cat "${tmpmnt}/ceph_fsid") + if [ ${osdFSID} != ${cephFSID} ]; then + echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster." + echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}" + echo "Because OSD_FORCE_REPAIR was set, we will zap this device." + zap_extra_partitions ${tmpmnt} + umount ${tmpmnt} + disk_zap ${OSD_DEVICE} + else + umount ${tmpmnt} + echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster." + echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped." + echo "Moving on, trying to activate the OSD now." + fi + else + echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID." + echo "Because OSD_FORCE_REPAIR was set, we will zap this device." + zap_extra_partitions ${tmpmnt} + umount ${tmpmnt} + disk_zap ${OSD_DEVICE} + fi + else + echo "Unable to determine the FSID of the current cluster." + echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped." + echo "Moving on, trying to activate the OSD now." + return + fi + else + echo "parted says ${DM_DEV} should exist, but we do not see it." + echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is" + echo "Moving on, trying to activate the OSD now." + return + fi + else + echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM" + echo "Moving on, trying to prepare and activate the OSD LVM now." + fi + + if [[ ${CEPH_DISK_USED} -eq 1 ]]; then + udev_settle + CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}" + ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') + elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then + udev_settle + vg_name=$(get_vg_name_from_device ${OSD_DEVICE}) + if [[ "${vg_name}" ]]; then + OSD_VG=${vg_name} + else + random_uuid=$(uuidgen) + vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE} + vg_name=$(get_vg_name_from_device ${OSD_DEVICE}) + vgrename ceph-vg-${random_uuid} ${vg_name} + OSD_VG=${vg_name} + fi + lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv) + if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then + lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG} + fi + OSD_LV=${OSD_VG}/${lv_name} + CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}" + CEPH_LVM_PREPARE=1 + udev_settle + fi + if [ ${CEPH_DISK_USED} -eq 0 ] ; then + if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then + CEPH_LVM_PREPARE=0 + fi + fi + + osd_journal_prepare + CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}" + udev_settle + + if [ ! -z "$DEVICE_CLASS" ]; then + CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}" + fi + + if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then + ceph-volume lvm -v prepare ${CLI_OPTS} + udev_settle + fi +} + +function osd_journal_create { + local osd_journal=${1} + local osd_journal_partition=$(echo ${osd_journal} | sed 's/[^0-9]//g') + local jdev=$(echo ${osd_journal} | sed 's/[0-9]//g') + if [ -b "${jdev}" ]; then + sgdisk --new=${osd_journal_partition}:0:+${OSD_JOURNAL_SIZE}M \ + --change-name='${osd_journal_partition}:ceph journal' \ + --partition-guid=${osd_journal_partition}:${OSD_JOURNAL_UUID} \ + --typecode=${osd_journal_partition}:45b0969e-9b03-4f30-b4c6-b4b80ceff106 --mbrtogpt -- ${jdev} + OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition}) + udev_settle + else + echo "The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system." + exit 1 + fi +} + +function osd_journal_prepare { + if [ -n "${OSD_JOURNAL}" ]; then + if [ -b ${OSD_JOURNAL} ]; then + OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) + OSD_JOURNAL_PARTITION=$(echo ${OSD_JOURNAL} | sed 's/[^0-9]//g') + local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') + if [ -z "${OSD_JOURNAL_PARTITION}" ]; then + OSD_JOURNAL=$(dev_part ${jdev} ${OSD_JOURNAL_PARTITION}) + else + OSD_JOURNAL=${OSD_JOURNAL} + fi + elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then + # The block device exists but doesn't appear to be paritioned, we will proceed with parititioning the device. + OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) + until [ -b ${OSD_JOURNAL} ]; do + osd_journal_create ${OSD_JOURNAL} + done + fi + chown ceph. ${OSD_JOURNAL}; + elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then + echo "No journal device specified. OSD and journal will share ${OSD_DEVICE}" + echo "For better performance on HDD, consider moving your journal to a separate device" + fi + CLI_OPTS="${CLI_OPTS} --filestore" +} diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-bluestore.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-bluestore.sh.tpl new file mode 100644 index 0000000000..cca0cb3d42 --- /dev/null +++ b/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-bluestore.sh.tpl @@ -0,0 +1,191 @@ +#!/bin/bash + +{{/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex + +export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION}) +export OSD_BLUESTORE=1 + +function osd_disk_prepare { + if [[ -z "${OSD_DEVICE}" ]]; then + echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb" + exit 1 + fi + + if [[ ! -b "${OSD_DEVICE}" ]]; then + echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !" + exit 1 + fi + + if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then + echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'" + exit 1 + fi + timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1 + + #search for some ceph metadata on the disk based on the status of the disk/lvm in filestore + CEPH_DISK_USED=0 + CEPH_LVM_PREPARE=1 + udev_settle + OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE}) + OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE}) + CLUSTER_FSID=$(ceph-conf --lookup fsid) + DISK_ZAPPED=0 + + if [[ ! -z "${OSD_FSID}" ]]; then + if [[ "${OSD_FSID}" == "${CLUSTER_FSID}" ]]; then + if [[ ! -z "${OSD_ID}" ]]; then + if ceph --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING osd ls |grep -w ${OSD_ID}; then + echo "Running bluestore mode and ${OSD_DEVICE} already bootstrapped" + CEPH_LVM_PREPARE=0 + elif [[ $OSD_FORCE_REPAIR -eq 1 ]]; then + echo "OSD initialized for this cluster, but OSD ID not found in the cluster, reinitializing" + else + echo "OSD initialized for this cluster, but OSD ID not found in the cluster" + fi + fi + else + echo "OSD initialized for a different cluster, zapping it" + disk_zap ${OSD_DEVICE} + udev_settle + fi + elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then + DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') + CEPH_DISK_USED=1 + else + if [[ ${CEPH_DISK_USED} -eq 1 ]]; then + if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then + echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled" + disk_zap ${OSD_DEVICE} + else + echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled." + echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk." + exit 1 + fi + fi + fi + + if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then + if [ -b $DM_DEV ]; then + local cephFSID=$(ceph-conf --lookup fsid) + if [ ! -z "${cephFSID}" ]; then + local tmpmnt=$(mktemp -d) + mount ${DM_DEV} ${tmpmnt} + if [ -f "${tmpmnt}/ceph_fsid" ]; then + osdFSID=$(cat "${tmpmnt}/ceph_fsid") + if [ ${osdFSID} != ${cephFSID} ]; then + echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster." + echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}" + echo "Because OSD_FORCE_REPAIR was set, we will zap this device." + zap_extra_partitions ${tmpmnt} + umount ${tmpmnt} + disk_zap ${OSD_DEVICE} + else + umount ${tmpmnt} + echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster." + echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped." + echo "Moving on, trying to activate the OSD now." + fi + else + echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID." + echo "Because OSD_FORCE_REPAIR was set, we will zap this device." + zap_extra_partitions ${tmpmnt} + umount ${tmpmnt} + disk_zap ${OSD_DEVICE} + fi + else + echo "Unable to determine the FSID of the current cluster." + echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped." + echo "Moving on, trying to activate the OSD now." + return + fi + else + echo "parted says ${DM_DEV} should exist, but we do not see it." + echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is" + echo "Moving on, trying to activate the OSD now." + return + fi + else + echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM" + echo "Moving on, trying to prepare and activate the OSD LVM now." + fi + + if [[ ${CEPH_DISK_USED} -eq 1 ]]; then + udev_settle + CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}" + ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') + elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then + udev_settle + vg_name=$(get_vg_name_from_device ${OSD_DEVICE}) + if [[ "${vg_name}" ]]; then + OSD_VG=${vg_name} + else + random_uuid=$(uuidgen) + vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE} + vg_name=$(get_vg_name_from_device ${OSD_DEVICE}) + vgrename ceph-vg-${random_uuid} ${vg_name} + OSD_VG=${vg_name} + fi + lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv) + if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then + lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG} + fi + OSD_LV=${OSD_VG}/${lv_name} + CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}" + CEPH_LVM_PREPARE=1 + udev_settle + fi + + if [ ${CEPH_DISK_USED} -eq 0 ]; then + if [[ ${BLOCK_DB} ]]; then + block_db_string=$(echo ${BLOCK_DB} | awk -F "/" '{print $2 "-" $3}') + fi + if [[ ${BLOCK_WAL} ]]; then + block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}') + fi + if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then + prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" + prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}" + elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then + prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}" + elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then + prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" + fi + else + if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then + CEPH_LVM_PREPARE=0 + fi + fi + + CLI_OPTS="${CLI_OPTS} --bluestore" + + if [ ! -z "$BLOCK_DB" ]; then + CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}" + fi + + if [ ! -z "$BLOCK_WAL" ]; then + CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}" + fi + + if [ ! -z "$DEVICE_CLASS" ]; then + CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}" + fi + + if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then + ceph-volume lvm -v prepare ${CLI_OPTS} + udev_settle + fi +} diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-directory.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-directory.sh.tpl new file mode 100644 index 0000000000..151766b438 --- /dev/null +++ b/ceph-osd/templates/bin/osd/ceph-volume/_init-ceph-volume-helper-directory.sh.tpl @@ -0,0 +1,23 @@ +#!/bin/bash + +{{/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex + +# We do not want to zap journal disk. Tracking this option seperatly. +: "${JOURNAL_FORCE_ZAP:=0}" + +export OSD_DEVICE="/var/lib/ceph/osd" +export OSD_JOURNAL="/var/lib/ceph/journal" diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl index 2442620b57..87e67740e2 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl @@ -18,25 +18,9 @@ set -ex source /tmp/osd-common-ceph-volume.sh +source /tmp/init-ceph-volume-helper-${STORAGE_TYPE}.sh + : "${OSD_FORCE_REPAIR:=0}" -# We do not want to zap journal disk. Tracking this option seperatly. -: "${JOURNAL_FORCE_ZAP:=0}" - -if [ "x${STORAGE_TYPE%-*}" == "xbluestore" ]; then - export OSD_BLUESTORE=1 -fi - -if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then - export OSD_DEVICE="/var/lib/ceph/osd" -else - export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION}) -fi - -if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then - export OSD_JOURNAL="/var/lib/ceph/journal" -else - export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) -fi # Set up aliases for functions that require disk synchronization alias rename_vg='locked rename_vg' @@ -157,34 +141,6 @@ function update_lv_tags { fi } -# Settle LVM changes before inspecting volumes -udev_settle - -# Rename VGs first -if [[ "${OSD_DEVICE}" ]]; then - OSD_DEVICE=$(readlink -f ${OSD_DEVICE}) - rename_vg ${OSD_DEVICE} -fi - -if [[ "${BLOCK_DB}" ]]; then - BLOCK_DB=$(readlink -f ${BLOCK_DB}) - rename_vg ${BLOCK_DB} -fi - -if [[ "${BLOCK_WAL}" ]]; then - BLOCK_WAL=$(readlink -f ${BLOCK_WAL}) - rename_vg ${BLOCK_WAL} -fi - -# Rename LVs after VGs are correct -rename_lvs ${OSD_DEVICE} - -# Update tags (all VG and LV names should be correct before calling this) -update_lv_tags ${OSD_DEVICE} - -# Settle LVM changes again after any changes have been made -udev_settle - function prep_device { local BLOCK_DEVICE=$1 local BLOCK_DEVICE_SIZE=$2 @@ -242,281 +198,42 @@ function prep_device { udev_settle } -function osd_disk_prepare { - if [[ -z "${OSD_DEVICE}" ]]; then - echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb" - exit 1 - fi +####################################################################### +# Main program +####################################################################### - if [[ ! -b "${OSD_DEVICE}" ]]; then - echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !" - exit 1 - fi +if [[ "${STORAGE_TYPE}" != "directory" ]]; then - if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then - echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'" - exit 1 - fi - timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1 - - #search for some ceph metadata on the disk based on the status of the disk/lvm in filestore - CEPH_DISK_USED=0 - CEPH_LVM_PREPARE=1 + # Settle LVM changes before inspecting volumes udev_settle - OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE}) - OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE}) - CLUSTER_FSID=$(ceph-conf --lookup fsid) - DISK_ZAPPED=0 - if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then - if [[ ! -z ${OSD_ID} ]]; then - DM_NUM=$(dmsetup ls | grep $(lsblk -J ${OSD_DEVICE} | jq -r '.blockdevices[].children[].name') | awk '{print $2}' | cut -d':' -f2 | cut -d')' -f1) - DM_DEV="/dev/dm-"${DM_NUM} - elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then - DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') - CEPH_DISK_USED=1 - else - if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then - echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway" - disk_zap ${OSD_DEVICE} - DISK_ZAPPED=1 - else - echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird." - echo "It would be too dangerous to destroy it without any notification." - echo "Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk." - exit 1 - fi - fi - else - if [[ ! -z "${OSD_FSID}" ]]; then - if [[ "${OSD_FSID}" == "${CLUSTER_FSID}" ]]; then - if [[ ! -z "${OSD_ID}" ]]; then - if ceph --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING osd ls |grep -w ${OSD_ID}; then - echo "Running bluestore mode and ${OSD_DEVICE} already bootstrapped" - CEPH_LVM_PREPARE=0 - elif [[ $OSD_FORCE_REPAIR -eq 1 ]]; then - echo "OSD initialized for this cluster, but OSD ID not found in the cluster, reinitializing" - else - echo "OSD initialized for this cluster, but OSD ID not found in the cluster" - fi - fi - else - echo "OSD initialized for a different cluster, zapping it" - disk_zap ${OSD_DEVICE} - udev_settle - fi - elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then - DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') - CEPH_DISK_USED=1 - else - if [[ ${CEPH_DISK_USED} -eq 1 ]]; then - if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then - echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled" - disk_zap ${OSD_DEVICE} - else - echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled." - echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk." - exit 1 - fi - fi - fi - fi - if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then - if [ -b $DM_DEV ]; then - local cephFSID=$(ceph-conf --lookup fsid) - if [ ! -z "${cephFSID}" ]; then - local tmpmnt=$(mktemp -d) - mount ${DM_DEV} ${tmpmnt} - if [ "${OSD_BLUESTORE:-0}" -ne 1 ] && [ "x$JOURNAL_TYPE" != "xdirectory" ]; then - # we only care about journals for filestore. - if [ -f "${tmpmnt}/whoami" ]; then - OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal") - local osd_id=$(cat "${tmpmnt}/whoami") - if [ ! -b "${OSD_JOURNAL_DISK}" ]; then - OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) - local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') - if [ ${jdev} == ${OSD_JOURNAL} ]; then - echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}." - echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it." - rm -rf ${tmpmnt}/ceph_fsid - else - echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}." - echo "Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will" - echo "attempt to recreate the missing journal device partitions." - osd_journal_create ${OSD_JOURNAL} - ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal - echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid - chown ceph. ${OSD_JOURNAL} - # During OSD start we will format the journal and set the fsid - touch ${tmpmnt}/run_mkjournal - fi - fi - else - echo "It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata." - echo "The device may contain inconsistent metadata or be corrupted." - echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it." - rm -rf ${tmpmnt}/ceph_fsid - fi - fi - if [ -f "${tmpmnt}/ceph_fsid" ]; then - osdFSID=$(cat "${tmpmnt}/ceph_fsid") - if [ ${osdFSID} != ${cephFSID} ]; then - echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster." - echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}" - echo "Because OSD_FORCE_REPAIR was set, we will zap this device." - zap_extra_partitions ${tmpmnt} - umount ${tmpmnt} - disk_zap ${OSD_DEVICE} - else - umount ${tmpmnt} - echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster." - echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped." - echo "Moving on, trying to activate the OSD now." - fi - else - echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID." - echo "Because OSD_FORCE_REPAIR was set, we will zap this device." - zap_extra_partitions ${tmpmnt} - umount ${tmpmnt} - disk_zap ${OSD_DEVICE} - fi - else - echo "Unable to determine the FSID of the current cluster." - echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped." - echo "Moving on, trying to activate the OSD now." - return - fi - else - echo "parted says ${DM_DEV} should exist, but we do not see it." - echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is" - echo "Moving on, trying to activate the OSD now." - return - fi - else - echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM" - echo "Moving on, trying to prepare and activate the OSD LVM now." + # Rename VGs first + if [[ "${OSD_DEVICE}" ]]; then + OSD_DEVICE=$(readlink -f ${OSD_DEVICE}) + rename_vg ${OSD_DEVICE} fi - if [[ ${CEPH_DISK_USED} -eq 1 ]]; then - udev_settle - CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}" - ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') - elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then - udev_settle - vg_name=$(get_vg_name_from_device ${OSD_DEVICE}) - if [[ "${vg_name}" ]]; then - OSD_VG=${vg_name} - else - random_uuid=$(uuidgen) - vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE} - vg_name=$(get_vg_name_from_device ${OSD_DEVICE}) - vgrename ceph-vg-${random_uuid} ${vg_name} - OSD_VG=${vg_name} - fi - lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv) - if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then - lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG} - fi - OSD_LV=${OSD_VG}/${lv_name} - CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}" - CEPH_LVM_PREPARE=1 - udev_settle + if [[ "${BLOCK_DB}" ]]; then + BLOCK_DB=$(readlink -f ${BLOCK_DB}) + rename_vg ${BLOCK_DB} fi - if [ "${OSD_BLUESTORE:-0}" -eq 1 ] && [ ${CEPH_DISK_USED} -eq 0 ] ; then - if [[ ${BLOCK_DB} ]]; then - block_db_string=$(echo ${BLOCK_DB} | awk -F "/" '{print $2 "-" $3}') - fi - if [[ ${BLOCK_WAL} ]]; then - block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}') - fi - if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then - prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" - prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}" - elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then - prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}" - elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then - prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" - fi - else - if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then - CEPH_LVM_PREPARE=0 - fi + if [[ "${BLOCK_WAL}" ]]; then + BLOCK_WAL=$(readlink -f ${BLOCK_WAL}) + rename_vg ${BLOCK_WAL} fi - if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then - CLI_OPTS="${CLI_OPTS} --bluestore" + # Rename LVs after VGs are correct + rename_lvs ${OSD_DEVICE} - if [ ! -z "$BLOCK_DB" ]; then - CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}" - fi + # Update tags (all VG and LV names should be correct before calling this) + update_lv_tags ${OSD_DEVICE} - if [ ! -z "$BLOCK_WAL" ]; then - CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}" - fi - else - # we only care about journals for filestore. - osd_journal_prepare - CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}" - udev_settle - fi + # Settle LVM changes again after any changes have been made + udev_settle - if [ ! -z "$DEVICE_CLASS" ]; then - CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}" - fi - - if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then - ceph-volume lvm -v prepare ${CLI_OPTS} - udev_settle - fi -} - -function osd_journal_create { - local osd_journal=${1} - local osd_journal_partition=$(echo ${osd_journal} | sed 's/[^0-9]//g') - local jdev=$(echo ${osd_journal} | sed 's/[0-9]//g') - if [ -b "${jdev}" ]; then - sgdisk --new=${osd_journal_partition}:0:+${OSD_JOURNAL_SIZE}M \ - --change-name='${osd_journal_partition}:ceph journal' \ - --partition-guid=${osd_journal_partition}:${OSD_JOURNAL_UUID} \ - --typecode=${osd_journal_partition}:45b0969e-9b03-4f30-b4c6-b4b80ceff106 --mbrtogpt -- ${jdev} - OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition}) - udev_settle - else - echo "The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system." - exit 1 - fi -} - -function osd_journal_prepare { - if [ -n "${OSD_JOURNAL}" ]; then - if [ -b ${OSD_JOURNAL} ]; then - OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) - OSD_JOURNAL_PARTITION=$(echo ${OSD_JOURNAL} | sed 's/[^0-9]//g') - local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') - if [ -z "${OSD_JOURNAL_PARTITION}" ]; then - OSD_JOURNAL=$(dev_part ${jdev} ${OSD_JOURNAL_PARTITION}) - else - OSD_JOURNAL=${OSD_JOURNAL} - fi - elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then - # The block device exists but doesn't appear to be paritioned, we will proceed with parititioning the device. - OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) - until [ -b ${OSD_JOURNAL} ]; do - osd_journal_create ${OSD_JOURNAL} - done - fi - chown ceph. ${OSD_JOURNAL}; - elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then - echo "No journal device specified. OSD and journal will share ${OSD_DEVICE}" - echo "For better performance on HDD, consider moving your journal to a separate device" - fi - CLI_OPTS="${CLI_OPTS} --filestore" -} - -if ! [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then osd_disk_prepare -fi -# Clean up resources held by the common script -common_cleanup + # Clean up resources held by the common script + common_cleanup +fi diff --git a/ceph-osd/templates/configmap-bin.yaml b/ceph-osd/templates/configmap-bin.yaml index 84fab45572..d897c625d4 100644 --- a/ceph-osd/templates/configmap-bin.yaml +++ b/ceph-osd/templates/configmap-bin.yaml @@ -34,20 +34,18 @@ data: {{ tuple "bin/osd/_start.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} log-tail.sh: | {{ tuple "bin/osd/_log-tail.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} - osd-directory-ceph-disk.sh: | + osd-directory-ceph-volume.sh: | {{ tuple "bin/osd/_directory.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} - osd-block-ceph-disk.sh: | -{{ tuple "bin/osd/ceph-disk/_block.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} - osd-bluestore-ceph-disk.sh: | -{{ tuple "bin/osd/ceph-disk/_bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} - osd-init-ceph-disk.sh: | -{{ tuple "bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} - osd-common-ceph-disk.sh: | -{{ tuple "bin/osd/ceph-disk/_common.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} osd-block-ceph-volume.sh: | {{ tuple "bin/osd/ceph-volume/_block.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} osd-bluestore-ceph-volume.sh: | {{ tuple "bin/osd/ceph-volume/_bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + osd-init-ceph-volume-helper-bluestore.sh: | +{{ tuple "bin/osd/ceph-volume/_init-ceph-volume-helper-bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + osd-init-ceph-volume-helper-directory.sh: | +{{ tuple "bin/osd/ceph-volume/_init-ceph-volume-helper-directory.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + osd-init-ceph-volume-helper-block-logical.sh: | +{{ tuple "bin/osd/ceph-volume/_init-ceph-volume-helper-block-logical.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} osd-init-ceph-volume.sh: | {{ tuple "bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} osd-common-ceph-volume.sh: | diff --git a/ceph-osd/templates/daemonset-osd.yaml b/ceph-osd/templates/daemonset-osd.yaml index 03c1080d1f..23a7fa9c84 100644 --- a/ceph-osd/templates/daemonset-osd.yaml +++ b/ceph-osd/templates/daemonset-osd.yaml @@ -214,17 +214,21 @@ spec: subPath: osd-init.sh readOnly: true - name: ceph-osd-bin - mountPath: /tmp/init-ceph-disk.sh - subPath: osd-init-ceph-disk.sh + mountPath: /tmp/init-ceph-volume-helper-bluestore.sh + subPath: osd-init-ceph-volume-helper-bluestore.sh + readOnly: true + - name: ceph-osd-bin + mountPath: /tmp/init-ceph-volume-helper-directory.sh + subPath: osd-init-ceph-volume-helper-directory.sh + readOnly: true + - name: ceph-osd-bin + mountPath: /tmp/init-ceph-volume-helper-block-logical.sh + subPath: osd-init-ceph-volume-helper-block-logical.sh readOnly: true - name: ceph-osd-bin mountPath: /tmp/init-ceph-volume.sh subPath: osd-init-ceph-volume.sh readOnly: true - - name: ceph-osd-bin - mountPath: /tmp/osd-common-ceph-disk.sh - subPath: osd-common-ceph-disk.sh - readOnly: true - name: ceph-osd-bin mountPath: /tmp/osd-common-ceph-volume.sh subPath: osd-common-ceph-volume.sh @@ -358,21 +362,13 @@ spec: subPath: osd-start.sh readOnly: true - name: ceph-osd-bin - mountPath: /tmp/osd-directory-ceph-disk.sh - subPath: osd-directory-ceph-disk.sh - readOnly: true - - name: ceph-osd-bin - mountPath: /tmp/osd-block-ceph-disk.sh - subPath: osd-block-ceph-disk.sh + mountPath: /tmp/osd-directory-ceph-volume.sh + subPath: osd-directory-ceph-volume.sh readOnly: true - name: ceph-osd-bin mountPath: /tmp/osd-block-ceph-volume.sh subPath: osd-block-ceph-volume.sh readOnly: true - - name: ceph-osd-bin - mountPath: /tmp/osd-bluestore-ceph-disk.sh - subPath: osd-bluestore-ceph-disk.sh - readOnly: true - name: ceph-osd-bin mountPath: /tmp/osd-bluestore-ceph-volume.sh subPath: osd-bluestore-ceph-volume.sh @@ -389,10 +385,6 @@ spec: mountPath: /tmp/utils-checkDNS.sh subPath: utils-checkDNS.sh readOnly: true - - name: ceph-osd-bin - mountPath: /tmp/osd-common-ceph-disk.sh - subPath: osd-common-ceph-disk.sh - readOnly: true - name: ceph-osd-bin mountPath: /tmp/osd-common-ceph-volume.sh subPath: osd-common-ceph-volume.sh diff --git a/ceph-osd/values.yaml b/ceph-osd/values.yaml index 515e88240b..b941f94e68 100644 --- a/ceph-osd/values.yaml +++ b/ceph-osd/values.yaml @@ -41,10 +41,8 @@ labels: node_selector_key: ceph-osd node_selector_value: enabled -# We could deploy ceph cluster now with either ceph-volume or ceph-disk however -# ceph-disk is deprecated from Nautilus. -# Keeping ceph-disk as default since gate scripts are still directory backed -# osds, need to change this after moving the gates to disk backed osd. +# The default deploy tool is ceph-volume. "ceph-disk" was finally removed as it +# had been deprecated from Nautilus and was not being used. deploy: tool: "ceph-volume" # NOTE: set this to 1 if osd disk needs wiping in case of reusing from previous deployment diff --git a/releasenotes/notes/ceph-osd.yaml b/releasenotes/notes/ceph-osd.yaml index 0fb562d3e7..24bf33f690 100644 --- a/releasenotes/notes/ceph-osd.yaml +++ b/releasenotes/notes/ceph-osd.yaml @@ -21,4 +21,5 @@ ceph-osd: - 0.1.18 Uplift from Nautilus to Octopus release - 0.1.19 Update rbac api version - 0.1.20 Update directory-based OSD deployment for image changes + - 0.1.21 Refactor Ceph OSD Init Scripts - First PS ...