Luminous版本的ceph新增了一个crush class的功能,可以为新增的设备指定为类,创建rule的时候直接指定class即可。

实验环境

  1. # cat /etc/redhat-release
  2. CentOS Linux release 7.5.1804 (Core)
  3. # ceph --version
  4. ceph version 12.2.9 (9e300932ef8a8916fb3fda78c58691a6ab0f4217) luminous (stable)

由于实验环境中并没有ssd磁盘,所以这里操作时假设每一个主机有一块ssd盘,操作时手动把对应的osd调整class标签。

修改crush class

1、查看当前osd分布情况
  1. # ceph osd tree
  2. ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
  3. -1 0.74797 root default
  4. -15 0.21999 rack rack1
  5. -9 0.08800 host node1
  6. 3 hdd 0.04399 osd.3 up 1.00000 1.00000
  7. 10 hdd 0.04399 osd.10 up 0.98001 1.00000
  8. 11 hdd 0.04399 osd.11 up 0.96002 1.00000
  9. -3 0.13199 host storage-0
  10. 4 hdd 0.04399 osd.4 up 0.96002 1.00000
  11. 5 hdd 0.04399 osd.5 up 0.96002 1.00000
  12. 14 hdd 0.04399 osd.14 up 0.98001 1.00000
  13. -16 0.26399 rack rack2
  14. -5 0.13199 host node2
  15. 0 hdd 0.04399 osd.0 up 0.98628 1.00000
  16. 6 hdd 0.04399 osd.6 up 1.00000 1.00000
  17. 16 hdd 0.04399 osd.16 up 1.00000 1.00000
  18. -7 0.13199 host storage-1
  19. 2 hdd 0.04399 osd.2 up 1.00000 1.00000
  20. 8 hdd 0.04399 osd.8 up 1.00000 1.00000
  21. 12 hdd 0.04399 osd.12 up 1.00000 1.00000
  22. -17 0.26399 rack rack3
  23. -11 0.13199 host node3
  24. 1 hdd 0.04399 osd.1 up 1.00000 1.00000
  25. 7 hdd 0.04399 osd.7 up 1.00000 1.00000
  26. 15 hdd 0.04399 osd.15 up 1.00000 1.00000
  27. -13 0.13199 host storage-2
  28. 9 hdd 0.04399 osd.9 up 1.00000 1.00000
  29. 13 hdd 0.04399 osd.13 up 1.00000 1.00000
  30. 17 hdd 0.04399 osd.17 up 0.96002 1.00000
2、查看当前集群的crush class
  1. # ceph osd crush class ls
  2. [
  3. "hdd",
  4. ]
3、删除osd.0,osd.1,osd.2,osd.3,osd.4,osd.9的class
  1. # for i in 0 1 2 3 4 9 ; do ceph osd crush rm-device-class osd.$i ; done
  2. done removing class of osd(s): 0
  3. done removing class of osd(s): 1
  4. done removing class of osd(s): 2
  5. done removing class of osd(s): 3
  6. done removing class of osd(s): 4
  7. done removing class of osd(s): 9
  8. # ceph osd tree
  9. ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
  10. -1 0.74797 root default
  11. -15 0.21999 rack rack1
  12. -9 0.08800 host node1
  13. 3 0.04399 osd.3 up 1.00000 1.00000
  14. 10 hdd 0.04399 osd.10 up 0.98001 1.00000
  15. 11 hdd 0.04399 osd.11 up 0.96002 1.00000
  16. -3 0.13199 host storage-0
  17. 4 0.04399 osd.4 up 0.96002 1.00000
  18. 5 hdd 0.04399 osd.5 up 0.96002 1.00000
  19. 14 hdd 0.04399 osd.14 up 0.98001 1.00000
  20. -16 0.26399 rack rack2
  21. -5 0.13199 host node2
  22. 0 0.04399 osd.0 up 0.98628 1.00000
  23. 6 hdd 0.04399 osd.6 up 1.00000 1.00000
  24. 16 hdd 0.04399 osd.16 up 1.00000 1.00000
  25. -7 0.13199 host storage-1
  26. 2 0.04399 osd.2 up 1.00000 1.00000
  27. 8 hdd 0.04399 osd.8 up 1.00000 1.00000
  28. 12 hdd 0.04399 osd.12 up 1.00000 1.00000
  29. -17 0.26399 rack rack3
  30. -11 0.13199 host node3
  31. 1 0.04399 osd.1 up 1.00000 1.00000
  32. 7 hdd 0.04399 osd.7 up 1.00000 1.00000
  33. 15 hdd 0.04399 osd.15 up 1.00000 1.00000
  34. -13 0.13199 host storage-2
  35. 9 0.04399 osd.9 up 1.00000 1.00000
  36. 13 hdd 0.04399 osd.13 up 1.00000 1.00000
  37. 17 hdd 0.04399 osd.17 up 0.96002 1.00000
4、设置osd.0,osd.1,osd.2,osd.3,osd.4,osd.9的class为ssd
  1. # for i in 0 1 2 3 4 9 ; do ceph osd crush set-device-class ssd osd.$i ; done
  2. set osd(s) 0 to class 'ssd'
  3. set osd(s) 1 to class 'ssd'
  4. set osd(s) 2 to class 'ssd'
  5. set osd(s) 3 to class 'ssd'
  6. set osd(s) 4 to class 'ssd'
  7. set osd(s) 9 to class 'ssd'
  8. # ceph osd tree
  9. ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
  10. -1 0.74797 root default
  11. -15 0.21999 rack rack1
  12. -9 0.08800 host node1
  13. 10 hdd 0.04399 osd.10 up 0.98001 1.00000
  14. 11 hdd 0.04399 osd.11 up 0.96002 1.00000
  15. 3 ssd 0.04399 osd.3 up 1.00000 1.00000
  16. -3 0.13199 host storage-0
  17. 5 hdd 0.04399 osd.5 up 0.96002 1.00000
  18. 14 hdd 0.04399 osd.14 up 0.98001 1.00000
  19. 4 ssd 0.04399 osd.4 up 0.96002 1.00000
  20. -16 0.26399 rack rack2
  21. -5 0.13199 host node2
  22. 6 hdd 0.04399 osd.6 up 1.00000 1.00000
  23. 16 hdd 0.04399 osd.16 up 1.00000 1.00000
  24. 0 ssd 0.04399 osd.0 up 0.98628 1.00000
  25. -7 0.13199 host storage-1
  26. 8 hdd 0.04399 osd.8 up 1.00000 1.00000
  27. 12 hdd 0.04399 osd.12 up 1.00000 1.00000
  28. 2 ssd 0.04399 osd.2 up 1.00000 1.00000
  29. -17 0.26399 rack rack3
  30. -11 0.13199 host node3
  31. 7 hdd 0.04399 osd.7 up 1.00000 1.00000
  32. 15 hdd 0.04399 osd.15 up 1.00000 1.00000
  33. 1 ssd 0.04399 osd.1 up 1.00000 1.00000
  34. -13 0.13199 host storage-2
  35. 13 hdd 0.04399 osd.13 up 1.00000 1.00000
  36. 17 hdd 0.04399 osd.17 up 0.96002 1.00000
  37. 9 ssd 0.04399 osd.9 up 1.00000 1.00000
5、再次查看crush class,多了个ssd
  1. # ceph osd crush class ls
  2. [
  3. "hdd",
  4. "ssd"
  5. ]

6、创建一个优先使用ssd的crush rule

  1. # ceph osd crush rule create-replicated rule-ssd default rack ssd
  2. # ceph osd crush rule dump rule-ssd
  3. {
  4. "rule_id": 1,
  5. "rule_name": "rule-ssd",
  6. "ruleset": 1,
  7. "type": 1,
  8. "min_size": 1,
  9. "max_size": 10,
  10. "steps": [
  11. {
  12. "op": "take",
  13. "item": -30,
  14. "item_name": "default~ssd"
  15. },
  16. {
  17. "op": "chooseleaf_firstn",
  18. "num": 0,
  19. "type": "rack"
  20. },
  21. {
  22. "op": "emit"
  23. }
  24. ]
  25. }

验证

方法一:
1、获取crushmap
  1. # ceph osd getcrushmap -o monmap
  2. 60
2、反编译crushmap
  1. # crushtool -d monmap -o monmap.txt
  2. # cat monmap.txt
  3. …………
  4. rule rule-ssd {
  5. id 1
  6. type replicated
  7. min_size 1
  8. max_size 10
  9. step take default class ssd
  10. step chooseleaf firstn 0 type rack
  11. step emit
  12. }

可以看到在crushmap中多了一项rule-ssd的crush rule

3、测试
  1. # crushtool -i monmap --test --min-x 0 --max-x 9 --num-rep 3 --ruleset 1 --show_mappings
  2. CRUSH rule 1 x 0 [3,2,9]
  3. CRUSH rule 1 x 1 [2,4,9]
  4. CRUSH rule 1 x 2 [1,4,0]
  5. CRUSH rule 1 x 3 [9,0,3]
  6. CRUSH rule 1 x 4 [2,9,3]
  7. CRUSH rule 1 x 5 [1,2,4]
  8. CRUSH rule 1 x 6 [1,3,0]
  9. CRUSH rule 1 x 7 [1,0,4]
  10. CRUSH rule 1 x 8 [0,4,1]
  11. CRUSH rule 1 x 9 [0,1,3]

从测试结果可以看到一共10次的测试中,所有的副本所在的OSD都是前面设置了class为ssd的OSD

方法二:
1、创建名为ssdtest的pool,并设置crush rule为rule-ssd
  1. # ceph osd pool create ssdtest 64 64 rule-ssd
  2. pool 'ssdtest' created
  3. [root@node1 ~]# ceph osd pool get ssdtest crush_rule
  4. crush_rule: rule-ssd
2、上传对象
  1. # rados -p ssdtest put init.txt init.sh
3、查询对象的OSD组
  1. # ceph osd map ssdtest init.txt
  2. osdmap e286 pool 'ssdtest' (10) object 'init.txt' -> pg 10.66387d99 (10.19) -> up ([9,2,3], p9) acting ([9,2,3], p9)

从查询结果可以看出,对象所在的OSD,是class为ssd的OSD