opus icon indicating copy to clipboard operation
opus copied to clipboard

unsupported configuration mode: 2

Open Davincible opened this issue 1 year ago • 8 comments

Trying to decode Telegram voice messages, get unsupported configuration mode: 2

Sample file: https://file.io/LAv06s4gaw1o

Davincible avatar Mar 23 '23 15:03 Davincible

I have same problem. Example file from this repo works fine, telegram voice message - no.

I prepeare some example, but playgrounds not supported loading files.

package main

import (
	"bytes"
	"errors"
	"github.com/pion/opus"
	"github.com/pion/opus/pkg/oggreader"
	"io"
	"log"
	"net/http"
)

func main() {
	files := []string{
		"https://storage.googleapis.com/training-cloud-bucket/tiny.ogg",
		"https://storage.googleapis.com/training-cloud-bucket/telegram.ogg",
	}

	for _, file := range files {
		response, err := http.Get(file)
		if err != nil {
			panic(err)
		}
		defer response.Body.Close()

		if err = OggRead(response.Body); err != nil {
			log.Printf("error: %v\n", err)
			continue
		}

		response.Body.Close()
		log.Println(file, "done")
	}

}

func OggRead(input io.Reader) error {
	ogg, _, err := oggreader.NewWith(input)
	if err != nil {
		return err
	}

	out := make([]byte, 1920)
	decoder := opus.NewDecoder()

	for {
		segments, _, err := ogg.ParseNextPage()
		if errors.Is(err, io.EOF) {
			break
		} else if bytes.HasPrefix(segments[0], []byte("OpusTags")) {
			continue
		}

		if err != nil {
			return err
		}

		for i := range segments {
			if _, _, err = decoder.Decode(segments[i], out); err != nil {
				return err
			}
		}
	}

	return nil
}

and have same problem

2023/04/09 12:23:10 https://storage.googleapis.com/training-cloud-bucket/tiny.ogg done
2023/04/09 12:23:10 error: unsupported configuration mode: 3

from here https://github.com/pion/opus/blob/master/decoder.go#L42

DeryabinSergey avatar Apr 09 '23 10:04 DeryabinSergey

I managed to get the following to work to decode to wav

package audio

import (
	"bytes"
	"encoding/binary"
	"errors"
	"io"

	"gopkg.in/hraban/opus.v2"
)

func DecodeOpus(data []byte) ([]byte, error) {
	// Wrap the input data in a bytes buffer
	input := bytes.NewBuffer(data)

	// Initialize a new stream with the bytes buffer
	stream, err := opus.NewStream(input)
	if err != nil {
		return nil, err
	}

	// Create a buffer to store the decoded PCM data
	pcm := make([]int16, 2880)

	// Read from the stream and decode until there is no more data
	var output []int16
	for {
		n, err := stream.Read(pcm)
		if errors.Is(err, io.EOF) {
			break
		} else if err != nil {
			return nil, err
		}

		if n == 0 {
			break
		}

		output = append(output, pcm[:n]...)
	}

	// Convert the PCM data to a byte slice
	out := make([]byte, len(output)*2)
	for i, v := range output {
		out[i*2] = byte(v)
		out[i*2+1] = byte(v >> 8)
	}

	return out, nil
}

func PCMToWAV(pcm []byte, sampleRate int, channels int) ([]byte, error) {
	// Calculate the size of the audio data and the size of the file
	audioSize := len(pcm)
	fileSize := audioSize + 36

	// Initialize a buffer to store the WAV audio data
	buf := new(bytes.Buffer)

	// Write the RIFF header
	if err := binary.Write(buf, binary.LittleEndian, []byte("RIFF")); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(fileSize)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, []byte("WAVE")); err != nil {
		return nil, err
	}

	// Write the format chunk
	if err := binary.Write(buf, binary.LittleEndian, []byte("fmt ")); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(16)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint16(1)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint16(channels)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate*int(channels)*2)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint16(channels*2)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint16(16)); err != nil {
		return nil, err
	}

	// Write the data chunk
	if err := binary.Write(buf, binary.LittleEndian, []byte("data")); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(audioSize)); err != nil {
		return nil, err
	}
	if _, err := buf.Write(pcm); err != nil {
		return nil, err
	}

	return buf.Bytes(), nil
}
		pcm, err := audio.DecodeOpus(voice)
		if err != nil {
			return "", fmt.Errorf("decode ogg: %w", err)
		}

		wav, err := audio.PCMToWAV(pcm, 48000, 2)
		if err != nil {
			return "", fmt.Errorf("encode wav: %w", err)
		}

Davincible avatar Apr 11 '23 11:04 Davincible

@Davincible thanks for the information. I saw and tried this library. But they include C modules. And this solution did not compile to use in the google cloud function.

DeryabinSergey avatar Apr 11 '23 11:04 DeryabinSergey

@DeryabinSergey yeah pure Go would be better, and tried to make it work but libs but couldn't quickly get it working

Davincible avatar Apr 12 '23 09:04 Davincible

@Davincible i think, but I am not sure, that voice messages from telegram use CELT and how described in roadmap at this moment it`s not ready

DeryabinSergey avatar Apr 12 '23 09:04 DeryabinSergey

I managed to get the following to work to decode to wav

package audio

import (
	"bytes"
	"encoding/binary"
	"errors"
	"io"

	"gopkg.in/hraban/opus.v2"
)

func DecodeOpus(data []byte) ([]byte, error) {
	// Wrap the input data in a bytes buffer
	input := bytes.NewBuffer(data)

	// Initialize a new stream with the bytes buffer
	stream, err := opus.NewStream(input)
	if err != nil {
		return nil, err
	}

	// Create a buffer to store the decoded PCM data
	pcm := make([]int16, 2880)

	// Read from the stream and decode until there is no more data
	var output []int16
	for {
		n, err := stream.Read(pcm)
		if errors.Is(err, io.EOF) {
			break
		} else if err != nil {
			return nil, err
		}

		if n == 0 {
			break
		}

		output = append(output, pcm[:n]...)
	}

	// Convert the PCM data to a byte slice
	out := make([]byte, len(output)*2)
	for i, v := range output {
		out[i*2] = byte(v)
		out[i*2+1] = byte(v >> 8)
	}

	return out, nil
}

func PCMToWAV(pcm []byte, sampleRate int, channels int) ([]byte, error) {
	// Calculate the size of the audio data and the size of the file
	audioSize := len(pcm)
	fileSize := audioSize + 36

	// Initialize a buffer to store the WAV audio data
	buf := new(bytes.Buffer)

	// Write the RIFF header
	if err := binary.Write(buf, binary.LittleEndian, []byte("RIFF")); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(fileSize)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, []byte("WAVE")); err != nil {
		return nil, err
	}

	// Write the format chunk
	if err := binary.Write(buf, binary.LittleEndian, []byte("fmt ")); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(16)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint16(1)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint16(channels)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate*int(channels)*2)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint16(channels*2)); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint16(16)); err != nil {
		return nil, err
	}

	// Write the data chunk
	if err := binary.Write(buf, binary.LittleEndian, []byte("data")); err != nil {
		return nil, err
	}
	if err := binary.Write(buf, binary.LittleEndian, uint32(audioSize)); err != nil {
		return nil, err
	}
	if _, err := buf.Write(pcm); err != nil {
		return nil, err
	}

	return buf.Bytes(), nil
}
		pcm, err := audio.DecodeOpus(voice)
		if err != nil {
			return "", fmt.Errorf("decode ogg: %w", err)
		}

		wav, err := audio.PCMToWAV(pcm, 48000, 2)
		if err != nil {
			return "", fmt.Errorf("encode wav: %w", err)
		}

it works but the output wav file decoded using example from opus file is not expected, it sounds unnormal and the size is bigger than the file decoded from opusdec opusdec --rate 16000 ./files/pcm-stereo.opus ./files/pcm-stereo.opus.wav

there are the files I got

-rw-r--r-- 1 root root 5.7M Apr 19 08:30 pcm-stereo-bygolib.opus.wav
-rw-r--r-- 1 root root 372K Apr 19 08:28 pcm-stereo.opus
-rw-r--r-- 1 root root 3.8M Apr 19 08:30 pcm-stereo.opus.wav

the code

        voice, _ := os.ReadFile("./files/pcm-stereo.opus")
        pcm, err := DecodeOpus(voice)
        if err != nil {
                panic(err)
        }

        wav, err := PCMToWAV(pcm, 16000, 2)
        if err != nil {
                panic(err)
        }
        os.WriteFile("./files/pcm-stereo-bygolib.opus.wav", wav, 0666)

any suggestion to to taclke this issue?

files.zip

linnv avatar Apr 19 '24 08:04 linnv

There is what i found in the source code of opusdec https://github.com/xiph/opus-tools/blob/master/src/opusdec.c#L516 which resample may work

linnv avatar Apr 20 '24 02:04 linnv

The library doesn't seem to support CELT coding yet. https://github.com/pion/opus/blob/867e82f700140532b924bb6fc94523ec9cd412eb/decoder.go#L45

It's on the roadmap, but until then, only SILK coding is supported.

xypwn avatar May 14 '24 10:05 xypwn