ceph-nvmeof
ceph-nvmeof copied to clipboard
get_io_stats fails to display IO statistics.
Get IO stats displays the IO metrics correctly when quried with active connected Gateway IP, else many metrics are "0" like bytes_written from other gateways.
Steps-to-follow:
- Deploy service with gateways (GW1 : LB-grp-id: 1, GW2: LB-grp-id: 2)
- Configure 2 subsystems
- Add both gateway listeners on each subsystem.
- Setup host, Add namespaces like below
- Subsystem-1 Namespaces tagged with LB-grp-id-1
- Subsystem-2 Namespaces tagged with LB-grp-id-2
- Start IO.
- Get IO stats for subsystem-1 namespaces from from GW2, then we can observe "bytes_written" will be "0"
root@ceph-sunilkumar-00-7v6gmf-node6 ~]# podman run --quiet --rm quay.io/barakda1/nvmeof-cli:1.2.1 --format json --server-address 10.0.208.84 --server-port 5500 get_subsystems
Get subsystems:
{
"subsystems": [
{
"nqn": "nqn.2016-06.io.spdk:cnode1",
"subtype": "NVMe",
"listen_addresses": [
{
"trtype": "TCP",
"adrfam": "IPv4",
"traddr": "10.0.208.84",
"trsvcid": "4420",
"transport": "TCP"
}
],
"allow_any_host": true,
"serial_number": "Ceph67140833472324",
"model_number": "Ceph bdev Controller",
"max_namespaces": 32,
"min_cntlid": 2041,
"max_cntlid": 4080,
"namespaces": [
{
"nsid": 1,
"name": "bdev_e42aafab-8f9b-4911-b326-4308cc962112",
"bdev_name": "bdev_e42aafab-8f9b-4911-b326-4308cc962112",
"nguid": "E42AAFAB8F9B4911B3264308CC962112",
"uuid": "e42aafab-8f9b-4911-b326-4308cc962112",
"anagrpid": 2,
"nonce": "10.0.208.84:0/3145406409"
},
{
"nsid": 2,
"name": "bdev_b0f9bf47-925a-4bcb-8e93-3749a98d6e2f",
"bdev_name": "bdev_b0f9bf47-925a-4bcb-8e93-3749a98d6e2f",
"nguid": "B0F9BF47925A4BCB8E933749A98D6E2F",
"uuid": "b0f9bf47-925a-4bcb-8e93-3749a98d6e2f",
"anagrpid": 2,
"nonce": "10.0.208.84:0/3145406409"
}
],
"hosts": []
},
{
"nqn": "nqn.2016-06.io.spdk:cnode2",
"subtype": "NVMe",
"listen_addresses": [
{
"trtype": "TCP",
"adrfam": "IPv4",
"traddr": "10.0.208.84",
"trsvcid": "4420",
"transport": "TCP"
}
],
"allow_any_host": true,
"serial_number": "Ceph5822363922143",
"model_number": "Ceph bdev Controller",
"max_namespaces": 32,
"min_cntlid": 2041,
"max_cntlid": 4080,
"namespaces": [
{
"nsid": 1,
"name": "bdev_e7721ff0-5d0e-4c9c-b703-6553aafc945a",
"bdev_name": "bdev_e7721ff0-5d0e-4c9c-b703-6553aafc945a",
"nguid": "E7721FF05D0E4C9CB7036553AAFC945A",
"uuid": "e7721ff0-5d0e-4c9c-b703-6553aafc945a",
"anagrpid": 1,
"nonce": "10.0.208.84:0/3359539278"
},
{
"nsid": 2,
"name": "bdev_35c56671-2204-42a9-9913-f51f77b65f60",
"bdev_name": "bdev_35c56671-2204-42a9-9913-f51f77b65f60",
"nguid": "35C56671220442A99913F51F77B65F60",
"uuid": "35c56671-2204-42a9-9913-f51f77b65f60",
"anagrpid": 1,
"nonce": "10.0.208.84:0/3359539278"
}
],
"hosts": []
}
]
}
Listeners are added
[root@ceph-sunilkumar-00-7v6gmf-node6 ~]# podman run --quiet --rm quay.io/barakda1/nvmeof-cli:1.2.1 --format json --server-address 10.0.208.84 --server-port 5500 listener list -n nqn.2016-06.io.spdk:cnode2
{
"error_message": "Success",
"listeners": [
{
"host_name": "ceph-sunilkumar-00-7v6gmf-node6",
"trtype": "TCP",
"traddr": "10.0.208.84",
"trsvcid": 4420,
"adrfam": "ipv4"
},
{
"host_name": "ceph-sunilkumar-00-7v6gmf-node7",
"trtype": "TCP",
"traddr": "10.0.209.23",
"trsvcid": 4420,
"adrfam": "ipv4"
}
],
"status": 0
}
[root@ceph-sunilkumar-00-7v6gmf-node6 ~]# podman run --quiet --rm quay.io/barakda1/nvmeof-cli:1.2.1 --format json --server-address 10.0.208.84 --server-port 5500 listener list -n nqn.2016-06.io.spdk:cnode1
{
"error_message": "Success",
"listeners": [
{
"host_name": "ceph-sunilkumar-00-7v6gmf-node6",
"trtype": "TCP",
"traddr": "10.0.208.84",
"trsvcid": 4420,
"adrfam": "ipv4"
},
{
"host_name": "ceph-sunilkumar-00-7v6gmf-node7",
"trtype": "TCP",
"traddr": "10.0.209.23",
"trsvcid": 4420,
"adrfam": "ipv4"
}
],
"status": 0
}
Get IO stats
[root@ceph-sunilkumar-00-7v6gmf-node6 ~]# podman run --quiet --rm quay.io/barakda1/nvmeof-cli:1.2.1 --format json --server-address 10.0.208.84 --server-port 5500 namespace get_io_stats --subsystem nqn.2016-06.io.spdk:cnode1 --nsid 1
{
"error_message": "Success",
"subsystem_nqn": "nqn.2016-06.io.spdk:cnode1",
"nsid": 1,
"uuid": "e42aafab-8f9b-4911-b326-4308cc962112",
"bdev_name": "bdev_e42aafab-8f9b-4911-b326-4308cc962112",
"tick_rate": "2290000000",
"ticks": "2621647473989225",
"bytes_read": "2183168",
"num_read_ops": "102",
"bytes_written": "113770496",
"num_write_ops": "868",
"read_latency_ticks": "127828488",
"max_read_latency_ticks": "5130384",
"min_read_latency_ticks": "570238",
"write_latency_ticks": "238402928",
"max_write_latency_ticks": "65301792",
"min_write_latency_ticks": "79444",
"status": 0,
"bytes_unmapped": "0",
"num_unmap_ops": "0",
"unmap_latency_ticks": "0",
"max_unmap_latency_ticks": "0",
"min_unmap_latency_ticks": "0",
"copy_latency_ticks": "0",
"max_copy_latency_ticks": "0",
"min_copy_latency_ticks": "0",
"io_error": []
}
[root@ceph-sunilkumar-00-7v6gmf-node6 ~]# podman run --quiet --rm quay.io/barakda1/nvmeof-cli:1.2.1 --format json --server-address 10.0.209.23 --server-port 5500 namespace get_io_stats --subsystem nqn.2016-06.io.spdk:cnode1 --nsid 1
{
"error_message": "Success",
"subsystem_nqn": "nqn.2016-06.io.spdk:cnode1",
"nsid": 1,
"uuid": "e42aafab-8f9b-4911-b326-4308cc962112",
"bdev_name": "bdev_e42aafab-8f9b-4911-b326-4308cc962112",
"tick_rate": "2190000000",
"ticks": "2507761566245463",
"bytes_read": "36864",
"num_read_ops": "2",
"read_latency_ticks": "8553886",
"max_read_latency_ticks": "4332226",
"min_read_latency_ticks": "4221660",
"status": 0,
"bytes_written": "0",
"num_write_ops": "0",
"bytes_unmapped": "0",
"num_unmap_ops": "0",
"write_latency_ticks": "0",
"max_write_latency_ticks": "0",
"min_write_latency_ticks": "0",
"unmap_latency_ticks": "0",
"max_unmap_latency_ticks": "0",
"min_unmap_latency_ticks": "0",
"copy_latency_ticks": "0",
"max_copy_latency_ticks": "0",
"min_copy_latency_ticks": "0",
"io_error": []
}
Why is this an issue? The stats come from the SPDK of the gateway, so if the namespace is not or has never been active on that gateway(spdk), then I would expect them to be 0's.
@sunilkumarn417 if this make sense, please close.
@pcuzner Thanks, I Understand that SPDK of the gateway provides this.
But the question is, As a user, which is the command to look on these metrics, on failover or no-failover?
@sunilkumarn417 as a user, you need to check that the IO continues when one of the nodes is not available. There are means on the host to check if IO exists to namespaces.