From 510cea0c23458142a6cb919ce993ceeaae65d8c9 Mon Sep 17 00:00:00 2001 From: Vladimir Kozhukalov Date: Thu, 16 Nov 2023 18:51:00 -0600 Subject: [PATCH] Deploy Ceph on multi-node envs - In case we deploy Ceph on a multi-node env we have to prepare the loop devices on all nodes. For this we moved loop devices setup to the deploy-env Ansible role. For simplicity we need the same device on all nodes, so we create a loop device with a big minor number (/dev/loop100 by default) hoping that only low minor numbers could be busy. - For test jobs we don't need to use different devices for OSD data and metadata. There is no any benefit from this for the test environment. So let's keep it simple and put both OSD data and metadata on the same device. - On multi-node env Ceph cluster needs cluster members see each other, so let's use pod network CIDR. Change-Id: I493b6c31d97ff2fc4992c6bb1994d0c73320cd7b --- roles/deploy-env/defaults/main.yaml | 7 ++++ roles/deploy-env/files/kubeadm_config.yaml | 4 +-- roles/deploy-env/files/loop-setup.service | 18 ++++++++++ roles/deploy-env/handlers/main.yaml | 9 +++++ roles/deploy-env/tasks/control-plane.yaml | 2 +- roles/deploy-env/tasks/loopback_devices.yaml | 33 +++++++++++++++++++ roles/deploy-env/tasks/main.yaml | 5 +++ roles/osh-run-script-set/defaults/main.yaml | 5 +-- roles/osh-run-script-set/tasks/main.yaml | 3 +- roles/osh-run-script/defaults/main.yaml | 5 +-- roles/osh-run-script/tasks/main.yaml | 3 +- .../020-ceph.sh => ceph/ceph.sh} | 29 ++++++++-------- .../openstack-support-rook/020-ceph.sh | 7 +--- .../deployment/openstack-support/020-ceph.sh | 1 - zuul.d/jobs.yaml | 17 ++++++---- 15 files changed, 111 insertions(+), 37 deletions(-) create mode 100644 roles/deploy-env/files/loop-setup.service create mode 100644 roles/deploy-env/handlers/main.yaml create mode 100644 roles/deploy-env/tasks/loopback_devices.yaml rename tools/deployment/{osh-infra-logging/020-ceph.sh => ceph/ceph.sh} (90%) delete mode 120000 tools/deployment/openstack-support/020-ceph.sh diff --git a/roles/deploy-env/defaults/main.yaml b/roles/deploy-env/defaults/main.yaml index 365e32669e..4a7c95529f 100644 --- a/roles/deploy-env/defaults/main.yaml +++ b/roles/deploy-env/defaults/main.yaml @@ -13,8 +13,15 @@ kubectl: user: zuul group: zuul +kubeadm: + pod_network_cidr: "10.244.0.0/24" + service_cidr: "10.96.0.0/16" docker: root_path: /var/lib/docker containerd: root_path: /var/lib/containerd +loopback_setup: false +loopback_device: /dev/loop100 +loopback_image: /var/lib/openstack-helm/ceph-loop.img +loopback_image_size: 12G ... diff --git a/roles/deploy-env/files/kubeadm_config.yaml b/roles/deploy-env/files/kubeadm_config.yaml index 25b1adcf22..147b0c6ef4 100644 --- a/roles/deploy-env/files/kubeadm_config.yaml +++ b/roles/deploy-env/files/kubeadm_config.yaml @@ -7,7 +7,7 @@ mode: ipvs apiVersion: kubeadm.k8s.io/v1beta2 kind: ClusterConfiguration networking: - serviceSubnet: "10.96.0.0/16" - podSubnet: "10.244.0.0/24" # --pod-network-cidr + serviceSubnet: "{{ kubeadm.service_cidr }}" # --service-cidr + podSubnet: "{{ kubeadm.pod_network_cidr }}" # --pod-network-cidr dnsDomain: "cluster.local" ... diff --git a/roles/deploy-env/files/loop-setup.service b/roles/deploy-env/files/loop-setup.service new file mode 100644 index 0000000000..d4d6e3f09e --- /dev/null +++ b/roles/deploy-env/files/loop-setup.service @@ -0,0 +1,18 @@ +[Unit] +Description=Setup loop devices +DefaultDependencies=no +Conflicts=umount.target +Before=local-fs.target +After=systemd-udevd.service +Requires=systemd-udevd.service + +[Service] +Type=oneshot +ExecStart=/sbin/losetup {{ loopback_device }} '{{ loopback_image }}' +ExecStop=/sbin/losetup -d {{ loopback_device }} +TimeoutSec=60 +RemainAfterExit=yes + +[Install] +WantedBy=local-fs.target +Also=systemd-udevd.service diff --git a/roles/deploy-env/handlers/main.yaml b/roles/deploy-env/handlers/main.yaml new file mode 100644 index 0000000000..e9846b0ee5 --- /dev/null +++ b/roles/deploy-env/handlers/main.yaml @@ -0,0 +1,9 @@ +--- +- name: Systemd reload + shell: systemctl daemon-reload + +- name: Restart loop-setup + service: + name: loop-setup + state: restarted +... diff --git a/roles/deploy-env/tasks/control-plane.yaml b/roles/deploy-env/tasks/control-plane.yaml index 8c2f9997c9..e9d7422ca2 100644 --- a/roles/deploy-env/tasks/control-plane.yaml +++ b/roles/deploy-env/tasks/control-plane.yaml @@ -8,7 +8,7 @@ state: mounted - name: Prepare kubeadm config - copy: + template: src: files/kubeadm_config.yaml dest: /tmp/kubeadm_config.yaml diff --git a/roles/deploy-env/tasks/loopback_devices.yaml b/roles/deploy-env/tasks/loopback_devices.yaml new file mode 100644 index 0000000000..54cbff6e5b --- /dev/null +++ b/roles/deploy-env/tasks/loopback_devices.yaml @@ -0,0 +1,33 @@ +--- +- name: Create loop device image + shell: | + mkdir -p {{ loopback_image | dirname }} + truncate -s {{ loopback_image_size }} {{ loopback_image }} + +- name: Create loop device + shell: | + mknod {{ loopback_device }} b $(grep loop /proc/devices | cut -c3) {{ loopback_device | regex_search('[0-9]+') }} + +- name: Create loop-setup systemd unit + template: + src: files/loop-setup.service + dest: /etc/systemd/system/loop-setup.service + notify: + - Systemd reload + +- name: Systemd reload + shell: systemctl daemon-reload + +- name: Configure loop-setup systemd unit + service: + name: loop-setup + enabled: yes + state: started + notify: + - Systemd reload + - Restart loop-setup + +- name: Check {{ loopback_device }} is attached + shell: | + losetup | grep -i {{ loopback_device }} +... diff --git a/roles/deploy-env/tasks/main.yaml b/roles/deploy-env/tasks/main.yaml index e6a4d0d289..003335a38d 100644 --- a/roles/deploy-env/tasks/main.yaml +++ b/roles/deploy-env/tasks/main.yaml @@ -41,4 +41,9 @@ path: /etc/resolv.conf state: present insertbefore: "BOF" + +- name: Loop devices + include_tasks: + file: loopback_devices.yaml + when: loopback_setup ... diff --git a/roles/osh-run-script-set/defaults/main.yaml b/roles/osh-run-script-set/defaults/main.yaml index 20896a4677..6f555bb1a9 100644 --- a/roles/osh-run-script-set/defaults/main.yaml +++ b/roles/osh-run-script-set/defaults/main.yaml @@ -11,8 +11,9 @@ # limitations under the License. --- -ceph: - loopback_path: "/var/lib/openstack-helm" +ceph_osd_data_device: "/dev/loop0" +kubeadm: + pod_network_cidr: "10.244.0.0/24" osh_params: container_distro_name: ubuntu container_distro_version: focal diff --git a/roles/osh-run-script-set/tasks/main.yaml b/roles/osh-run-script-set/tasks/main.yaml index 6ae8c6e2b2..3bddbb92ca 100644 --- a/roles/osh-run-script-set/tasks/main.yaml +++ b/roles/osh-run-script-set/tasks/main.yaml @@ -23,7 +23,8 @@ args: chdir: "{{ zuul.project.src_dir }}/{{ gate_scripts_relative_path }}" environment: - CEPH_LOOPBACK_PATH: "{{ ceph.loopback_path }}" + CEPH_OSD_DATA_DEVICE: "{{ ceph_osd_data_device }}" + POD_NETWORK_CIDR: "{{ kubeadm.pod_network_cidr }}" zuul_site_mirror_fqdn: "{{ zuul_site_mirror_fqdn }}" OSH_EXTRA_HELM_ARGS: "{{ zuul_osh_extra_helm_args_relative_path | default('') }}" OSH_PATH: "{{ zuul_osh_relative_path | default('../openstack-helm/') }}" diff --git a/roles/osh-run-script/defaults/main.yaml b/roles/osh-run-script/defaults/main.yaml index 20896a4677..6f555bb1a9 100644 --- a/roles/osh-run-script/defaults/main.yaml +++ b/roles/osh-run-script/defaults/main.yaml @@ -11,8 +11,9 @@ # limitations under the License. --- -ceph: - loopback_path: "/var/lib/openstack-helm" +ceph_osd_data_device: "/dev/loop0" +kubeadm: + pod_network_cidr: "10.244.0.0/24" osh_params: container_distro_name: ubuntu container_distro_version: focal diff --git a/roles/osh-run-script/tasks/main.yaml b/roles/osh-run-script/tasks/main.yaml index 8789c7a073..844f6b3591 100644 --- a/roles/osh-run-script/tasks/main.yaml +++ b/roles/osh-run-script/tasks/main.yaml @@ -20,7 +20,8 @@ args: chdir: "{{ zuul.project.src_dir }}/{{ gate_scripts_relative_path }}" environment: - CEPH_LOOPBACK_PATH: "{{ ceph.loopback_path }}" + CEPH_OSD_DATA_DEVICE: "{{ ceph_osd_data_device }}" + POD_NETWORK_CIDR: "{{ kubeadm.pod_network_cidr }}" zuul_site_mirror_fqdn: "{{ zuul_site_mirror_fqdn }}" OSH_EXTRA_HELM_ARGS: "{{ zuul_osh_extra_helm_args_relative_path | default('') }}" OSH_PATH: "{{ zuul_osh_relative_path | default('../openstack-helm/') }}" diff --git a/tools/deployment/osh-infra-logging/020-ceph.sh b/tools/deployment/ceph/ceph.sh similarity index 90% rename from tools/deployment/osh-infra-logging/020-ceph.sh rename to tools/deployment/ceph/ceph.sh index 188625436f..ba6f5cd67a 100755 --- a/tools/deployment/osh-infra-logging/020-ceph.sh +++ b/tools/deployment/ceph/ceph.sh @@ -14,17 +14,16 @@ set -xe -# setup loopback devices for ceph -free_loop_devices=( $(ls -1 /dev/loop[0-7] | while read loopdev; do losetup | grep -q $loopdev || echo $loopdev; done) ) -./tools/deployment/common/setup-ceph-loopback-device.sh \ - --ceph-osd-data ${CEPH_OSD_DATA_DEVICE:=${free_loop_devices[0]}} \ - --ceph-osd-dbwal ${CEPH_OSD_DB_WAL_DEVICE:=${free_loop_devices[1]}} +: ${CEPH_OSD_DATA_DEVICE:="/dev/loop100"} +: ${POD_NETWORK_CIDR:="10.244.0.0/24"} #NOTE: Lint and package chart for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do make "${CHART}" done +NUMBER_OF_OSDS="$(kubectl get nodes -l ceph-osd=enabled --no-headers | wc -l)" + #NOTE: Deploy command : ${OSH_EXTRA_HELM_ARGS:=""} [ -s /tmp/ceph-fs-uuid.txt ] || uuidgen > /tmp/ceph-fs-uuid.txt @@ -54,8 +53,8 @@ endpoints: metrics: default: 9283 network: - public: 172.17.0.1/16 - cluster: 172.17.0.1/16 + public: "${POD_NETWORK_CIDR}" + cluster: "${POD_NETWORK_CIDR}" port: mon: 6789 rgw: 8088 @@ -83,8 +82,8 @@ conf: crush: tunables: ${CRUSH_TUNABLES} target: - osd: 1 - final_osd: 1 + osd: ${NUMBER_OF_OSDS} + final_osd: ${NUMBER_OF_OSDS} pg_per_osd: 100 default: crush_rule: same_host @@ -174,12 +173,12 @@ conf: - data: type: bluestore location: ${CEPH_OSD_DATA_DEVICE} - block_db: - location: ${CEPH_OSD_DB_WAL_DEVICE} - size: "5GB" - block_wal: - location: ${CEPH_OSD_DB_WAL_DEVICE} - size: "2GB" + # block_db: + # location: ${CEPH_OSD_DB_WAL_DEVICE} + # size: "5GB" + # block_wal: + # location: ${CEPH_OSD_DB_WAL_DEVICE} + # size: "2GB" pod: replicas: diff --git a/tools/deployment/openstack-support-rook/020-ceph.sh b/tools/deployment/openstack-support-rook/020-ceph.sh index 503088c940..bae24d9491 100755 --- a/tools/deployment/openstack-support-rook/020-ceph.sh +++ b/tools/deployment/openstack-support-rook/020-ceph.sh @@ -17,11 +17,7 @@ set -xe # Specify the Rook release tag to use for the Rook operator here ROOK_RELEASE=v1.12.4 -# setup loopback devices for ceph -free_loop_devices=( $(ls -1 /dev/loop[0-7] | while read loopdev; do losetup | grep -q $loopdev || echo $loopdev; done) ) -./tools/deployment/common/setup-ceph-loopback-device.sh \ - --ceph-osd-data ${CEPH_OSD_DATA_DEVICE:=${free_loop_devices[0]}} \ - --ceph-osd-dbwal ${CEPH_OSD_DB_WAL_DEVICE:=${free_loop_devices[1]}} +: ${CEPH_OSD_DATA_DEVICE:="/dev/loop100"} #NOTE: Deploy command : ${OSH_EXTRA_HELM_ARGS:=""} @@ -499,7 +495,6 @@ cephClusterSpec: devices: - name: "${CEPH_OSD_DATA_DEVICE}" config: - metadataDevice: "${CEPH_OSD_DB_WAL_DEVICE}" databaseSizeMB: "5120" walSizeMB: "2048" disruptionManagement: diff --git a/tools/deployment/openstack-support/020-ceph.sh b/tools/deployment/openstack-support/020-ceph.sh deleted file mode 120000 index 1ab828eed6..0000000000 --- a/tools/deployment/openstack-support/020-ceph.sh +++ /dev/null @@ -1 +0,0 @@ -../osh-infra-logging/020-ceph.sh \ No newline at end of file diff --git a/zuul.d/jobs.yaml b/zuul.d/jobs.yaml index fadf0c4a21..ebae4df066 100644 --- a/zuul.d/jobs.yaml +++ b/zuul.d/jobs.yaml @@ -92,8 +92,13 @@ root_path: "/opt/ext_vol/docker" containerd: root_path: "/opt/ext_vol/containerd" - ceph: - loopback_path: "/opt/ext_vol/openstack-helm" + kubeadm: + pod_network_cidr: "10.244.0.0/24" + service_cidr: "10.96.0.0/16" + loopback_setup: true + loopback_device: /dev/loop100 + loopback_image: "/opt/ext_vol/openstack-helm/ceph-loop.img" + ceph_osd_data_device: /dev/loop100 # the k8s package versions are available here # https://packages.cloud.google.com/apt/dists/kubernetes-xenial/main/binary-amd64/Packages kube_version: "1.26.3-00" @@ -108,7 +113,7 @@ - job: name: openstack-helm-infra-logging parent: openstack-helm-infra-deploy - nodeset: openstack-helm-1node-ubuntu_focal + nodeset: openstack-helm-3nodes-ubuntu_focal vars: osh_params: openstack_release: "2023.1" @@ -117,7 +122,7 @@ gate_scripts: - ./tools/deployment/osh-infra-logging/000-prepare-k8s.sh - ./tools/deployment/osh-infra-logging/010-ingress.sh - - ./tools/deployment/osh-infra-logging/020-ceph.sh + - ./tools/deployment/ceph/ceph.sh - ./tools/deployment/osh-infra-logging/025-ceph-ns-activate.sh - ./tools/deployment/osh-infra-logging/030-radosgw-osh-infra.sh - ./tools/deployment/osh-infra-logging/040-ldap.sh @@ -194,7 +199,7 @@ - ./tools/deployment/openstack-support/000-prepare-k8s.sh - ./tools/deployment/openstack-support/007-namespace-config.sh - ./tools/deployment/openstack-support/010-ingress.sh - - ./tools/deployment/openstack-support/020-ceph.sh + - ./tools/deployment/ceph/ceph.sh - ./tools/deployment/openstack-support/025-ceph-ns-activate.sh - ./tools/deployment/openstack-support/030-rabbitmq.sh - ./tools/deployment/openstack-support/070-mariadb.sh @@ -250,7 +255,7 @@ - ./tools/deployment/openstack-support/000-prepare-k8s.sh - ./tools/deployment/openstack-support/007-namespace-config.sh - ./tools/deployment/openstack-support/010-ingress.sh - - ./tools/deployment/openstack-support/020-ceph.sh + - ./tools/deployment/ceph/ceph.sh - ./tools/deployment/openstack-support/025-ceph-ns-activate.sh - ./tools/deployment/openstack-support/030-rabbitmq.sh - ./tools/deployment/openstack-support/070-mariadb.sh