Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
step_size=ogive_mu,
update=ogive_update,
proj_back=True,
init_eig=(args.init == init_choices[1]),
callback=convergence_callback,
)
else:
raise ValueError("No such algorithm {}".format(args.algo))
toc = time.perf_counter()
print("Processing time: {} s".format(toc - tic))
# Run iSTFT
if Y.shape[2] == 1:
y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[
:, None
]
y = y.astype(np.float64)
else:
y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype(
np.float64
)
# If some of the output are uniformly zero, just add a bit of noise to compare
for k in range(y.shape[1]):
if np.sum(np.abs(y[:, k])) < 1e-10:
y[:, k] = np.random.randn(y.shape[0]) * 1e-10
# For conventional methods of BSS, reorder the signals by decreasing power
if args.algo != "blinkiva":
new_ord = np.argsort(np.std(y, axis=0))[::-1]
'cmu_arctic_us_aew_a0001.wav')
noise_fp = os.path.join(os.path.dirname(__file__), 'input_samples',
'doing_the_dishes.wav')
noisy_signal, signal, noise, fs = pra.create_noisy_signal(signal_fp,
snr=snr,
noise_fp=noise_fp)
wavfile.write(os.path.join(os.path.dirname(__file__), 'output_samples',
'denoise_input_SpectralSub.wav'), fs,
noisy_signal.astype(np.float32))
"""
Create STFT and SCNR objects
"""
hop = nfft // 2
window = pra.hann(nfft, flag='asymmetric', length='full')
stft = pra.transform.STFT(nfft, hop=hop, analysis_window=window,
streaming=True)
scnr = SpectralSub(nfft, db_reduc, lookback, beta, alpha)
lookback_time = hop/fs * lookback
print("Lookback : %f seconds" % lookback_time)
"""
Process as in real-time
"""
# collect the processed blocks
processed_audio = np.zeros(signal.shape)
n = 0
while noisy_signal.shape[0] - n >= hop:
# SCNR in frequency domain
stft.analysis(noisy_signal[n:(n+hop), ])
# fix the randomness for repeatability
np.random.seed(10)
# set the source powers, the first one is half
source_std = np.ones(n_sources_target)
source_std[0] /= np.sqrt(2.0)
SIR = 10 # dB
SNR = (
60
) # dB, this is the SNR with respect to a single target source and microphone self-noise
# STFT parameters
framesize = 4096
win_a = pra.hann(framesize)
win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2)
# algorithm parameters
n_iter = args.n_iter
# param ogive
ogive_mu = 0.1
ogive_update = "switching"
ogive_iter = 2000
# Geometry of the room and location of sources and microphones
room_dim = np.array([10, 7.5, 3])
mic_locs = semi_circle_layout(
[4.1, 3.76, 1.2], np.pi, 0.04, n_mics, rot=np.pi / 2.0 * 0.99
)
target_locs = semi_circle_layout(
parser.add_argument('--save', action='store_true',
help='Saves the output of the separation to wav files')
args = parser.parse_args()
if args.gui:
# avoids a bug with tkinter and matplotlib
import matplotlib
matplotlib.use('TkAgg')
import pyroomacoustics as pra
## Prepare one-shot STFT
L = args.block
hop = L // 2
win_a = pra.hann(L)
win_s = pra.transform.compute_synthesis_window(win_a, hop)
## Create a room with sources and mics
# Room dimensions in meters
room_dim = [8, 9]
# source location
source = np.array([1, 4.5])
room = pra.ShoeBox(
room_dim,
fs=16000,
max_order=15,
absorption=0.35,
sigma2_awgn=1e-8)
# get signals
signals = [ np.concatenate([wavfile.read(f)[1].astype(np.float32)
def convergence_callback(Y):
global SDR, SIR
from mir_eval.separation import bss_eval_sources
ref = np.moveaxis(separate_recordings, 1, 2)
y = pra.transform.synthesis(Y, L, hop, win=win_s)
y = y[L-hop: , :].T
m = np.minimum(y.shape[1], ref.shape[1])
sdr, sir, sar, perm = bss_eval_sources(ref[:, :m, 0], y[:, :m])
SDR.append(sdr)
SIR.append(sir)
# create window functions
analysis_win = windows.hann(self.L)
# perform STFT
sig_stft = transform.analysis(self.signals.T,
L=self.L,
hop=self.hop,
win=analysis_win,
zp_back=self.zpb,
zp_front=self.zpf)
# beamform
sig_stft_bf = np.sum(sig_stft * self.weights.conj().T, axis=2)
# back to time domain
output = transform.synthesis(sig_stft_bf,
L=self.L,
hop=self.hop,
zp_back=self.zpb,
zp_front=self.zpf)
# remove the zero padding from output signal
if self.zpb is 0:
output = output[self.zpf:]
else:
output = output[self.zpf:-self.zpb]
else:
# TD processing
if self.weights is not None and self.filters is None:
Length of filter in time domain = / *
"""
# the unknown filters in the frequency domain
num_bands = fft_length//2+1
W = np.random.randn(num_taps,num_bands) + \
1j*np.random.randn(num_taps,num_bands)
W /= np.linalg.norm(W, axis=0)
# create a known driving signal
x = np.random.randn(n_samples)
# take to STFT domain
window = pra.hann(fft_length) # the analysis window
hop = fft_length//2
stft_in = pra.transform.STFT(fft_length, hop=hop,
analysis_window=window, channels=1)
stft_out = pra.transform.STFT(fft_length, hop=hop,
analysis_window=window, channels=1)
n = 0
num_blocks = 0
X_concat = np.zeros((num_bands,n_samples//hop),dtype=np.complex64)
while n_samples - n > hop:
stft_in.analysis(x[n:n+hop,])
X_concat[:,num_blocks] = stft_in.X
n += hop
num_blocks += 1
# convolve in frequency domain with unknown filter
elif bss_type == 'fastmnmf':
# Run FastMNMF
Y = pra.bss.fastmnmf(X, n_iter=args.n_iter, n_components=8, n_src=2,
callback=cb_print)
elif bss_type == 'sparseauxiva':
# Estimate set of active frequency bins
ratio = 0.35
average = np.abs(np.mean(np.mean(X, axis=2), axis=0))
k = np.int_(average.shape[0] * ratio)
S = np.sort(np.argpartition(average, -k)[-k:])
# Run SparseAuxIva
Y = pra.bss.sparseauxiva(X, S, n_iter=30, proj_back=True,
callback=cb_print)
## STFT Synthesis
y = pra.transform.synthesis(Y, L, L, zp_back=L//2, zp_front=L//2).T
## GUI starts
print('* Start GUI')
class PlaySoundGUI(object):
def __init__(self, master, fs, mix, sources):
self.master = master
self.fs = fs
self.mix = mix
self.sources = sources
master.title("A simple GUI")
self.label = Label(master, text="This is our first GUI!")
self.label.pack()
self.mix_button = Button(master, text='Mix', command=lambda: self.play(self.mix))
self.mix_button.pack()