【发布时间】:2022-11-30 23:05:10
【问题描述】:
我用 ceph 16.2.7 创建了一个集群,并添加了带有集群扩展的 OSD,然后我按照以下步骤从 crush map 中删除了一些 OSD。此外,选项 --all-available-devices 是非托管的。
enter image description here #ceph orch 应用 osd --all-available-devices --unmanaged=true
#ceph osd out osd.x
#ceph osd down osd.x
#systemctl stop ceph-d813d6b4-6d3c-11ec-a97e-000c2992a0d6@osd.x.service
#ceph osd crush rm osd.x
#ceph osd rm osd.x
#ceph auth rm osd.x
#cd /var/lib/ceph/d813d6b4-6d3c-11ec-a97e-000c2992a0d6/
#rm osd.x -rf
# cd /etc/systemd/system/ceph-d813d6b4-6d3c-11ec-a97e-000c2992a0d6.target.wants/
# rm ceph-d813d6b4-6d3c-11ec-a97e-000c2992a0d6@osd.x.service
#lvremove /dev/ceph-*
但是当我删除与已删除 OSD 相关的 LVM 时。被删除的 OSD 会自动恢复。我不希望这种情况发生。我想在磁盘上手动创建 OSD。谁能给我解释一下?
[root@ceph2-node-01 ~]# ceph orch ls --export --format yaml
service_type: alertmanager
service_name: alertmanager
placement:
count: 3
label: mon
---
service_type: crash
service_name: crash
placement:
host_pattern: '*'
--- !!python/object:ceph.deployment.service_spec.MonitoringSpec
config: null
networks: []
placement: !!python/object:ceph.deployment.service_spec.PlacementSpec
count: 3
count_per_host: null
host_pattern: null
hosts:
- !!python/object/new:ceph.deployment.service_spec.HostPlacementSpec
- ceph2-node-02
- ''
- ''
- !!python/object/new:ceph.deployment.service_spec.HostPlacementSpec
- ceph2-node-03
- ''
- ''
label: null
port: null
preview_only: false
service_id: null
service_type: grafana
unmanaged: false
---
service_type: mgr
service_name: mgr
placement:
count: 2
---
service_type: mon
service_name: mon
placement:
count: 5
--- !!python/object:ceph.deployment.service_spec.MonitoringSpec
config: null
networks: []
placement: !!python/object:ceph.deployment.service_spec.PlacementSpec
count: null
count_per_host: null
host_pattern: '*'
hosts: []
label: null
port: null
preview_only: false
service_id: null
service_type: node-exporter
unmanaged: false
---
service_type: osd
service_id: all-available-devices
service_name: osd.all-available-devices
placement:
host_pattern: '*'
unmanaged: true
spec:
data_devices:
all: true
filter_logic: AND
objectstore: bluestore
---
service_type: osd
service_id: dashboard-admin-1642344788791
service_name: osd.dashboard-admin-1642344788791
placement:
host_pattern: '*'
spec:
data_devices:
rotational: true
db_devices:
rotational: false
db_slots: 2
filter_logic: AND
objectstore: bluestore
--- !!python/object:ceph.deployment.service_spec.MonitoringSpec
config: null
networks: []
placement: !!python/object:ceph.deployment.service_spec.PlacementSpec
count: 3
count_per_host: null
host_pattern: null
hosts: []
label: mon
port: null
preview_only: false
service_id: null
service_type: prometheus
unmanaged: false
【问题讨论】:
-
几乎所有删除 OSD 的手动步骤都可以由 cephadm 处理,看起来就像您使用 cephadm 部署了该集群一样。查看docs.ceph.com/en/latest/cephadm/services/osd/#remove-an-osd了解更多详情。
ceph orch ls --export --format yaml的输出是多少? -
是的,我用 cephadm 部署了我的集群,我检查了那个链接,但这并没有帮助解决问题。我在问题中添加了命令的输出
-
您似乎有两种不同的 OSD 规格,您只禁用了其中一种。 ID 为 dashboard-admin-1642344788791 的服务是故意存在的还是实验性的?删除它或禁用它,然后重试。
-
我没有启用它,默认情况下它是如何禁用它的?
-
这个服务有什么作用(dashboard-admin)?我没有找到任何资源来研究它
标签: ceph object-storage cephadm