kaldiio icon indicating copy to clipboard operation
kaldiio copied to clipboard

Update readme to include `kaldi_native_io`

Open csukuangfj opened this issue 2 years ago • 0 comments

See https://github.com/csukuangfj/kaldi_native_io

kaldi_native_io is implemented in C++ and wrapped to Python.

Example usage

Read/write kaldi::Matrix<float>

import numpy as np

import kaldi_native_io

base = "float_matrix"
wspecifier = f"ark,scp,t:{base}.ark,{base}.scp"
rspecifier = f"scp:{base}.scp"


def test_float_matrix_writer():
    with kaldi_native_io.FloatMatrixWriter(wspecifier) as ko:
        ko.write("a", np.array([[1, 2], [3, 4]], dtype=np.float32))
        ko["b"] = np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32)


def test_sequential_float_matrix_reader():
    with kaldi_native_io.SequentialFloatMatrixReader(rspecifier) as ki:
        for key, value in ki:
            if key == "a":
                assert np.array_equal(
                    value, np.array([[1, 2], [3, 4]], dtype=np.float32)
                )
            elif key == "b":
                assert np.array_equal(
                    value,
                    np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32),
                )
            else:
                raise ValueError(f"Unknown key {key} with value {value}")


def test_random_access_float_matrix_reader():
    with kaldi_native_io.RandomAccessFloatMatrixReader(rspecifier) as ki:
        assert "b" in ki
        assert "a" in ki
        assert np.array_equal(
            ki["a"], np.array([[1, 2], [3, 4]], dtype=np.float32)
        )
        assert np.array_equal(
            ki["b"], np.array([[10, 20, 30], [40, 50, 60]], dtype=np.float32)
        )

Read/write std::vector<int>

import kaldi_native_io

base = "int32_vector"
wspecifier = f"ark,scp,t:{base}.ark,{base}.scp"
rspecifier = f"scp:{base}.scp"


def test_int32_vector_writer():
    with kaldi_native_io.Int32VectorWriter(wspecifier) as ko:
        ko.write("a", [10, 20])
        ko["b"] = [100, 200, 300]


def test_sequential_int32_vector_reader():
    with kaldi_native_io.SequentialInt32VectorReader(rspecifier) as ki:
        for key, value in ki:
            if key == "a":
                assert value == [10, 20]
            elif key == "b":
                assert value == [100, 200, 300]
            else:
                raise ValueError(f"Unknown key {key} with value {value}")


def test_random_access_int32_vector_reader():
    with kaldi_native_io.RandomAccessInt32VectorReader(rspecifier) as ki:
        assert "b" in ki
        assert "a" in ki
        assert ki["a"] == [10, 20]
        assert ki["b"] == [100, 200, 300]

The interface is rather uniform:

  • For writing, use XxxWriter(wspecifier)
  • For sequential reading, use SequentialXxxReader(rspecifier)
  • For random-access reading, use RandomAccessXxxReader(rspecifier)

More usage examples can be found at https://github.com/csukuangfj/kaldi_native_io/tree/master/kaldi_native_io/python/tests

csukuangfj avatar Feb 11 '22 11:02 csukuangfj