PaddleHub Failed to customize PaddleSpeech service for PaddleHub

Failed to customize PaddleSpeech service for PaddleHub

Open pinnnkman opened this issue 1 year ago • 1 comments

env:

OS:

Windows 10

Python:

3.8.10

Requirements:

paddlepaddle==2.4.2
paddlenlp==2.5.2
paddlehub==2.3.1
paddlespeech==1.4.1

File Structure:

module.py:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import base64
import io
import sys
import time
import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable, serving
from paddlehub.utils.log import logger

# from paddlespeech.cli.asr.infer import ASRExecutor
from paddlespeech.server.engine.asr.python.asr_engine import ASREngine
from paddlespeech.server.engine.asr.python.asr_engine import PaddleASRConnectionHandler
from yacs.config import CfgNode


def base64_to_audio(b64str) -> io.BytesIO:
    data = base64.b64decode(b64str.encode('utf8'))
    # return io.BytesIO(data)
    return data

@moduleinfo(
    name="ASR",
    version="1.0.0",
    summary="This is a PaddleHub Self-Custom Module for Automatic Speech Recognition. Powered By Mercedes-Benz RDCP/SI Team.",
    author="ruitian",
    author_email="",
    type="audio/asr"
)
class ASR(hub.Module):
    def _initialize(self):
        _config = dict()
        _config['device'] = 'gpu'
        _config['model'] = 'conformer_online_aishell'
        _config['lang'] = 'zh'
        _config['sample_rate'] = 16000
        _config['cfg_path'] = None
        _config['decode_method'] = 'attention_rescoring'
        _config['ckpt_path'] = None
        _config['force_yes'] = False
        config = CfgNode(_config)
        asr_engine = ASREngine()
        asr_engine.init(config)
        self.connection_handler = PaddleASRConnectionHandler(asr_engine)

    @serving
    def speech_recognize(self, audios, **kwargs):
        def predict(audios, **kwargs):
            res = []
            if isinstance(audios, io.BytesIO):
                # if not self.asr._check(audios, sample_rate=16000):
                #     sys.exit(-1)

                # self.asr.preprocess(self.model, audios)
                # self.asr.infer(self.model)
                # _result = self.asr.postprocess()
                self.connection_handler.run(audios)
                asr_results = self.connection_handler.postprocess()
                res.append(asr_results)

            elif isinstance(audios, list):
                for audio in audios:
                    # if not self.asr._check(audio, sample_rate=16000):
                    #     sys.exit(-1)

                    # self.asr.preprocess(self.model, audio)
                    # self.asr.infer(self.model)
                    self.connection_handler.run(audio)
                    asr_results = self.connection_handler.postprocess()
                    res.append(asr_results)
            else:
                raise RuntimeError('The audio format cannot be used in serving. make sure the base64 data converted by audio file.')

            return res
        audio_decode = [base64_to_audio(audio) for audio in audios]
        starttime = time.time()
        results = predict(audio_decode, **kwargs)
        elapse = time.time() - starttime
        logger.info("Predict time: {}".format(elapse))
        return [{"results": results, "elapse": elapse}]

if __name__ == '__main__':
    lib = ASR()
    lib._initialize()
    def readwav2base64(wav_file):
        """
        read wave file and covert to base64 string
        """
        with open(wav_file, 'rb') as f:
            base64_bytes = base64.b64encode(f.read())
            base64_string = base64_bytes.decode('utf-8')
        return base64_string

    file_path = r"C:\Users\user\Desktop\zh.wav"
    b64_data = readwav2base64(file_path)
    a = lib.speech_recognize([b64_data])
    # a = lib.speech_recognize([b64_data])
    print(a)

Run "main" error logs:

2024-01-11 15:31:57.691 | INFO     | paddlespeech.s2t.modules.ctc:<module>:45 - paddlespeech_ctcdecoders not installed!
2024-01-11 15:31:57.803 | INFO     | paddlespeech.s2t.modules.embedding:__init__:150 - max len: 5000
[2024-01-11 15:32:00,635] [    INFO] - Initialize ASR server engine successfully on device: gpu.
[2024-01-11 15:32:00] [CRITICAL] [transformation.py:149] Catch a exception from 0th func: LogMelSpectrogramKaldi(fs=16000, n_mels=80, n_frame_shift=10.0, n_frame_length=25.0, dither=0.1))
[2024-01-11 15:32:00,642] [    INFO] - When the type of 'input' in assign is numpy.ndarray, the data type of 'input' must be bool, float32, int32 or int64, but received int16.

NOTE:

If "@moduleinfo" is commented out, then the main method executes successfully

Jan 11 '24 07:01 pinnnkman

PaddleHub PaddleHub copied to clipboard

Failed to customize PaddleSpeech service for PaddleHub

env:

OS:

Python:

Requirements:

File Structure:

module.py:

Run "main" error logs:

If "@moduleinfo" is commented out, then the main method executes successfully

PaddleHub
PaddleHub copied to clipboard