PiDTLN
PiDTLN copied to clipboard
Using multi-channel audio
Hi there,
The current code works great when using a single channel audio input, but when the channel is set to 2 the code throws an error
Exception ignored from cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x7f73d4faf0>:
Traceback (most recent call last):
File "/home/pi/.local/lib/python3.9/site-packages/sounddevice.py", line 880, in callback_ptr
return _wrap_callback(
File "/home/pi/.local/lib/python3.9/site-packages/sounddevice.py", line 2681, in _wrap_callback
callback(*args)
File "/home/pi/PiDTLN/ns.py", line 128, in callback
indata = indata[:, [args.channel]]
IndexError: index 2 is out of bounds for axis 1 with size 2
I did modify the code to separately process the two streams but I get a input overflow output underflow issue as the way I have modified the code slows things down. Do you have a suggestion on how to solve the issue? The modified code is below
# set some parameters
block_len_ms = 32
block_shift_ms = 8
fs_target = 16000
# create the interpreters
interpreter_1 = tflite.Interpreter(model_path='./models/dtln_ns_quant_1.tflite', num_threads=args.threads)
interpreter_1.allocate_tensors()
interpreter_2 = tflite.Interpreter(model_path='./models/dtln_ns_quant_2.tflite', num_threads=args.threads)
interpreter_2.allocate_tensors()
# Get input and output tensors.
input_details_1 = interpreter_1.get_input_details()
output_details_1 = interpreter_1.get_output_details()
input_details_2 = interpreter_2.get_input_details()
output_details_2 = interpreter_2.get_output_details()
# create states for the lstms
states_1_ch1 = np.zeros(input_details_1[1]['shape']).astype('float32')
states_2_ch1 = np.zeros(input_details_2[1]['shape']).astype('float32')
states_1_ch2 = np.zeros(input_details_1[1]['shape']).astype('float32')
states_2_ch2 = np.zeros(input_details_2[1]['shape']).astype('float32')
# calculate shift and length
block_shift = int(np.round(fs_target * (block_shift_ms / 1000)))
block_len = int(np.round(fs_target * (block_len_ms / 1000)))
# create buffer
in_buffer_ch1 = np.zeros((block_len)).astype('float32')
out_buffer_ch1 = np.zeros((block_len)).astype('float32')
in_buffer_ch2 = np.zeros((block_len)).astype('float32')
out_buffer_ch2 = np.zeros((block_len)).astype('float32')
if args.no_fftw:
g_use_fftw = False
if g_use_fftw:
fft_buf_ch1 = pyfftw.empty_aligned(512, dtype='float32')
rfft_ch1 = pyfftw.builders.rfft(fft_buf_ch1, threads=args.threads)
ifft_buf_ch1 = pyfftw.empty_aligned(257, dtype='complex64')
irfft_ch1 = pyfftw.builders.irfft(ifft_buf_ch1, threads=args.threads)
fft_buf_ch2 = pyfftw.empty_aligned(512, dtype='float32')
rfft_ch2 = pyfftw.builders.rfft(fft_buf_ch2, threads=args.threads)
ifft_buf_ch2 = pyfftw.empty_aligned(257, dtype='complex64')
irfft_ch2 = pyfftw.builders.irfft(ifft_buf_ch2, threads=args.threads)
t_ring = collections.deque(maxlen=100)
def callback(indata, outdata, frames, buf_time, status):
# buffer and states to global
global in_buffer_ch1, in_buffer_ch2, out_buffer_ch1, out_buffer_ch2, states_1_ch1, states_2_ch1, states_1_ch2,\
states_2_ch2, t_ring, g_use_fftw
if args.measure:
start_time = time.time()
if status:
print(status)
# if args.channels is not None:
# indata = indata[:, [args.channels]]
indata = indata[:, args.channels]
print(indata.shape)
if args.no_denoise:
outdata[:] = indata
if args.measure:
t_ring.append(time.time() - start_time)
return
# write to buffer
in_buffer_ch1[:-block_shift] = in_buffer_ch1[block_shift:]
in_buffer_ch1[-block_shift:] = np.squeeze(indata[:, 0])
in_buffer_ch2[:-block_shift] = in_buffer_ch2[block_shift:]
in_buffer_ch2[-block_shift:] = np.squeeze(indata[:, 1])
# calculate fft of input block
if g_use_fftw:
fft_buf_ch1[:] = in_buffer_ch1
in_block_fft_ch1 = rfft_ch1()
fft_buf_ch2[:] = in_buffer_ch2
in_block_fft_ch2 = rfft_ch2()
else:
in_block_fft = np.fft.rfft(in_buffer_ch1)
in_block_fft = np.fft.rfft(in_buffer_ch2)
in_mag_ch1 = np.abs(in_block_fft_ch1)
in_phase_ch1 = np.angle(in_block_fft_ch1)
in_mag_ch2 = np.abs(in_block_fft_ch2)
in_phase_ch2 = np.angle(in_block_fft_ch2)
# reshape magnitude to input dimensions
in_mag_ch1 = np.reshape(in_mag_ch1, (1, 1, -1)).astype('float32')
in_mag_ch2 = np.reshape(in_mag_ch2, (1, 1, -1)).astype('float32')
# set tensors to the first model
interpreter_1.set_tensor(input_details_1[1]['index'], states_1_ch1)
interpreter_1.set_tensor(input_details_1[0]['index'], in_mag_ch1)
# run calculation
interpreter_1.invoke()
# get the output of the first block
out_mask_ch1 = interpreter_1.get_tensor(output_details_1[0]['index'])
states_1_ch1 = interpreter_1.get_tensor(output_details_1[1]['index'])
# calculate the ifft
estimated_complex_ch1 = in_mag_ch1 * out_mask_ch1 * np.exp(1j * in_phase_ch1)
if g_use_fftw:
ifft_buf_ch1[:] = estimated_complex_ch1
estimated_block_ch1 = irfft_ch1()
else:
estimated_block_ch1 = np.fft.irfft(estimated_complex_ch1)
# reshape the time domain block
estimated_block_ch1 = np.reshape(estimated_block_ch1, (1, 1, -1)).astype('float32')
# set tensors to the first model
interpreter_1.set_tensor(input_details_1[1]['index'], states_1_ch2)
interpreter_1.set_tensor(input_details_1[0]['index'], in_mag_ch2)
# run calculation
interpreter_1.invoke()
# get the output of the first block
out_mask_ch2 = interpreter_1.get_tensor(output_details_1[0]['index'])
states_1_ch2 = interpreter_1.get_tensor(output_details_1[1]['index'])
# calculate the ifft
estimated_complex_ch2 = in_mag_ch2 * out_mask_ch2 * np.exp(1j * in_phase_ch2)
if g_use_fftw:
ifft_buf_ch2[:] = estimated_complex_ch2
estimated_block_ch2 = irfft_ch2()
else:
estimated_block_ch2 = np.fft.irfft(estimated_complex_ch2)
# reshape the time domain block
estimated_block_ch2 = np.reshape(estimated_block_ch2, (1, 1, -1)).astype('float32')
# set tensors to the second block
interpreter_2.set_tensor(input_details_2[1]['index'], states_2_ch1)
interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block_ch1)
# run calculation
interpreter_2.invoke()
# get output tensors
out_block_ch1 = interpreter_2.get_tensor(output_details_2[0]['index'])
states_2_ch1 = interpreter_2.get_tensor(output_details_2[1]['index'])
# write to buffer
out_buffer_ch1[:-block_shift] = out_buffer_ch1[block_shift:]
out_buffer_ch1[-block_shift:] = np.zeros((block_shift))
out_buffer_ch1 += np.squeeze(out_block_ch1)
# output to soundcard
# outdata[:, 0] = np.expand_dims(out_buffer_ch1[:block_shift], axis=-1)
outdata[:, 0] = out_buffer_ch1[:block_shift]
# set tensors to the second block
interpreter_2.set_tensor(input_details_2[1]['index'], states_2_ch2)
interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block_ch2)
# run calculation
interpreter_2.invoke()
# get output tensors
out_block_ch2 = interpreter_2.get_tensor(output_details_2[0]['index'])
states_2_ch2 = interpreter_2.get_tensor(output_details_2[1]['index'])
# write to buffer
out_buffer_ch2[:-block_shift] = out_buffer_ch2[block_shift:]
out_buffer_ch2[-block_shift:] = np.zeros((block_shift))
out_buffer_ch2 += np.squeeze(out_block_ch2)
# output to soundcard
# outdata[:, 1] = np.expand_dims(out_buffer_ch2[:block_shift], axis=-1)
outdata[:, 1] = out_buffer_ch2[:block_shift]
# print(indata.shape)
# print(outdata.shape)
if args.measure:
t_ring.append(time.time() - start_time)
def open_stream():
with sd.Stream(device=(args.input_device, args.output_device), samplerate=fs_target, blocksize=block_shift,
dtype=np.float32, latency=args.latency, channels=args.channels, callback=callback):
print('#' * 80)
print('Ctrl-C to exit')
print('#' * 80)
if args.measure:
while True:
time.sleep(1)
print('Processing time: {:.2f} ms'.format(1000 * np.average(t_ring)), end='\r')
else:
threading.Event().wait()
try:
if args.daemonize:
with daemon.DaemonContext():
open_stream()
else:
open_stream()
except KeyboardInterrupt:
parser.exit('')
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))