All checks were successful
Build / precommit (pull_request) Successful in 5m22s
- add maintenance mode, how to bootstrap an osd, remove an osd
136 lines
4.2 KiB
Markdown
136 lines
4.2 KiB
Markdown
# managing ceph
|
|
|
|
Always refer back to the official documentation at https://docs.ceph.com/en/latest
|
|
|
|
## adding new cephfs
|
|
- create a erasure code profile which will allow you to customise the raid level
|
|
- raid5 with 3 disks? k=2,m=1
|
|
- raid5 with 6 disks? k=5,m=1
|
|
- raid6 with 4 disks? k=2,m=2, etc
|
|
- create osd pool using custom profile for data
|
|
- create osd pool using default replicated profile for metadata
|
|
- enable ec_overwrites for the data pool
|
|
- create the ceph fs volume using data/metadata pools
|
|
- set ceph fs settings
|
|
- specify minimum number of metadata servers (mds)
|
|
- set fs to be for bulk data
|
|
- set mds fast failover with standby reply
|
|
|
|
|
|
```
|
|
sudo ceph osd erasure-code-profile set ec_4_1 k=4 m=1
|
|
sudo ceph osd pool create media_data 128 erasure ec_4_1
|
|
sudo ceph osd pool create media_metadata 32 replicated_rule
|
|
sudo ceph osd pool set media_data allow_ec_overwrites true
|
|
sudo ceph osd pool set media_data bulk true
|
|
sudo ceph fs new mediafs media_metadata media_data --force
|
|
sudo ceph fs set mediafs allow_standby_replay true
|
|
sudo ceph fs set mediafs max_mds 2
|
|
```
|
|
|
|
## creating authentication tokens
|
|
|
|
- this will create a client keyring named media
|
|
- this client will have the following capabilities:
|
|
- mon: read
|
|
- mds:
|
|
- read /
|
|
- read/write /media
|
|
- read/write /common
|
|
- osd: read/write to cephfs_data pool
|
|
|
|
```
|
|
sudo ceph auth get-or-create client.media \
|
|
mon 'allow r' \
|
|
mds 'allow r path=/, allow rw path=/media, allow rw path=/common' \
|
|
osd 'allow rw pool=cephfs_data'
|
|
```
|
|
|
|
## list the authentication tokens and permissions
|
|
|
|
ceph auth ls
|
|
|
|
## change the capabilities of a token
|
|
|
|
this will overwrite the current capabilities of a given client.user
|
|
|
|
sudo ceph auth caps client.media \
|
|
mon 'allow r' \
|
|
mds 'allow rw path=/' \
|
|
osd 'allow rw pool=media_data'
|
|
|
|
## adding a new osd on new node
|
|
|
|
create the ceph conf (automate this?)
|
|
|
|
cat <<EOF | sudo tee /etc/ceph/ceph.conf
|
|
[global]
|
|
auth_client_required = cephx
|
|
auth_cluster_required = cephx
|
|
auth_service_required = cephx
|
|
fsid = de96a98f-3d23-465a-a899-86d3d67edab8
|
|
mon_allow_pool_delete = true
|
|
mon_initial_members = prodnxsr0009,prodnxsr0010,prodnxsr0011,prodnxsr0012,prodnxsr0013
|
|
mon_host = 198.18.23.9,198.18.23.10,198.18.23.11,198.18.23.12,198.18.23.13
|
|
ms_bind_ipv4 = true
|
|
ms_bind_ipv6 = false
|
|
osd_crush_chooseleaf_type = 1
|
|
osd_pool_default_min_size = 2
|
|
osd_pool_default_size = 3
|
|
osd_pool_default_pg_num = 128
|
|
public_network = 198.18.23.1/32,198.18.23.2/32,198.18.23.3/32,198.18.23.4/32,198.18.23.5/32,198.18.23.6/32,198.18.23.7/32,198.18.23.8/32,198.18.23.9/32,198.18.23.10/32,198.18.23.11/32,198.18.23.12/32,198.18.23.13/32
|
|
EOF
|
|
|
|
ssh to one of the monitor hosts, then transfer the keys required
|
|
|
|
sudo cat /etc/ceph/ceph.client.admin.keyring | ssh prodnxsr0003 'sudo tee /etc/ceph/ceph.client.admin.keyring'
|
|
sudo cat /var/lib/ceph/bootstrap-osd/ceph.keyring | ssh prodnxsr0003 'sudo tee /var/lib/ceph/bootstrap-osd/ceph.keyring'
|
|
|
|
assuming we are adding /dev/sda to the cluster, first zap the disk to remove partitions/lvm/metadata
|
|
|
|
sudo ceph-volume lvm zap /dev/sda --destroy
|
|
|
|
then add it to the cluster
|
|
|
|
sudo ceph-volume lvm create --data /dev/sda
|
|
|
|
## removing an osd
|
|
|
|
check what OSD IDs were on this host (if you know it)
|
|
|
|
sudo ceph osd tree
|
|
|
|
or check for any DOWN osds
|
|
|
|
sudo ceph osd stat
|
|
sudo ceph health detail
|
|
|
|
once you identify the old OSD ID, remove it with these steps, replace X with the actual OSD ID:
|
|
|
|
sudo ceph osd out osd.X
|
|
sudo ceph osd down osd.X
|
|
sudo ceph osd crush remove osd.X
|
|
sudo ceph auth del osd.X
|
|
sudo ceph osd rm osd.X
|
|
|
|
|
|
## maintenance mode for the cluster
|
|
|
|
from one node in the cluster disable recovery
|
|
|
|
sudo ceph osd set noout
|
|
sudo ceph osd set nobackfill
|
|
sudo ceph osd set norecover
|
|
sudo ceph osd set norebalance
|
|
sudo ceph osd set nodown
|
|
sudo ceph osd set pause
|
|
|
|
to undo the change, use unset
|
|
|
|
sudo ceph osd unset noout
|
|
sudo ceph osd unset nobackfill
|
|
sudo ceph osd unset norecover
|
|
sudo ceph osd unset norebalance
|
|
sudo ceph osd unset nodown
|
|
sudo ceph osd unset pause
|