diff --git a/.github/scripts/unittest-linux/run_test.sh b/.github/scripts/unittest-linux/run_test.sh index 611e456f37..59bb4e39f3 100755 --- a/.github/scripts/unittest-linux/run_test.sh +++ b/.github/scripts/unittest-linux/run_test.sh @@ -33,6 +33,8 @@ fi export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_unidecode=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_demucs=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_fairseq=true cd test - pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency" + pytest torchaudio_unittest ) diff --git a/test/torchaudio_unittest/functional/torchscript_consistency_cpu_test.py b/test/torchaudio_unittest/functional/torchscript_consistency_cpu_test.py deleted file mode 100644 index 514b9bc063..0000000000 --- a/test/torchaudio_unittest/functional/torchscript_consistency_cpu_test.py +++ /dev/null @@ -1,14 +0,0 @@ -import torch -from torchaudio_unittest.common_utils import PytorchTestCase - -from .torchscript_consistency_impl import Functional, FunctionalFloat32Only - - -class TestFunctionalFloat32(Functional, FunctionalFloat32Only, PytorchTestCase): - dtype = torch.float32 - device = torch.device("cpu") - - -class TestFunctionalFloat64(Functional, PytorchTestCase): - dtype = torch.float64 - device = torch.device("cpu") diff --git a/test/torchaudio_unittest/functional/torchscript_consistency_cuda_test.py b/test/torchaudio_unittest/functional/torchscript_consistency_cuda_test.py deleted file mode 100644 index b389eb6cbd..0000000000 --- a/test/torchaudio_unittest/functional/torchscript_consistency_cuda_test.py +++ /dev/null @@ -1,16 +0,0 @@ -import torch -from torchaudio_unittest.common_utils import PytorchTestCase, skipIfNoCuda - -from .torchscript_consistency_impl import Functional, FunctionalFloat32Only - - -@skipIfNoCuda -class TestFunctionalFloat32(Functional, FunctionalFloat32Only, PytorchTestCase): - dtype = torch.float32 - device = torch.device("cuda") - - -@skipIfNoCuda -class TestFunctionalFloat64(Functional, PytorchTestCase): - dtype = torch.float64 - device = torch.device("cuda") diff --git a/test/torchaudio_unittest/functional/torchscript_consistency_impl.py b/test/torchaudio_unittest/functional/torchscript_consistency_impl.py deleted file mode 100644 index 099b370086..0000000000 --- a/test/torchaudio_unittest/functional/torchscript_consistency_impl.py +++ /dev/null @@ -1,822 +0,0 @@ -"""Test suites for jit-ability and its numerical compatibility""" -import unittest - -import torch -import torchaudio.functional as F -from parameterized import parameterized -from torchaudio_unittest import common_utils -from torchaudio_unittest.common_utils import skipIfRocm, TempDirMixin, TestBaseMixin, torch_script -from torchaudio.functional.functional import rnnt_loss - - - -class Functional(TempDirMixin, TestBaseMixin): - """Implements test for `functional` module that are performed for different devices""" - - def _assert_consistency(self, func, inputs, shape_only=False): - inputs_ = [] - for i in inputs: - if torch.is_tensor(i): - i = i.to(device=self.device, dtype=self.dtype) - inputs_.append(i) - ts_func = torch_script(func) - - torch.random.manual_seed(40) - output = func(*inputs_) - - torch.random.manual_seed(40) - ts_output = ts_func(*inputs_) - - if shape_only: - ts_output = ts_output.shape - output = output.shape - self.assertEqual(ts_output, output) - - def _assert_consistency_complex(self, func, inputs): - inputs_ = [] - for i in inputs: - if torch.is_tensor(i): - i = i.to(dtype=self.complex_dtype if i.is_complex() else self.dtype, device=self.device) - inputs_.append(i) - ts_func = torch_script(func) - - torch.random.manual_seed(40) - output = func(*inputs_) - - torch.random.manual_seed(40) - ts_output = ts_func(*inputs_) - - self.assertEqual(ts_output, output) - - @parameterized.expand( - [ - (True,), - (False,), - ("window",), - ("frame_length",), - ] - ) - def test_spectrogram(self, normalize): - waveform = common_utils.get_whitenoise() - n_fft = 400 - ws = 400 - hop = 200 - pad = 0 - window = torch.hann_window(ws, device=waveform.device, dtype=waveform.dtype) - power = None - self._assert_consistency( - F.spectrogram, (waveform, pad, window, n_fft, hop, ws, power, normalize, True, "reflect", True, True) - ) - - @parameterized.expand( - [ - (True,), - (False,), - ("window",), - ("frame_length",), - ] - ) - def test_inverse_spectrogram(self, normalize): - waveform = common_utils.get_whitenoise(sample_rate=8000, duration=0.05) - specgram = common_utils.get_spectrogram(waveform, n_fft=400, hop_length=200) - length = 400 - n_fft = 400 - hop = 200 - ws = 400 - pad = 0 - window = torch.hann_window(ws, device=specgram.device, dtype=torch.float64) - self._assert_consistency_complex( - F.inverse_spectrogram, (specgram, length, pad, window, n_fft, hop, ws, normalize, True, "reflect", True) - ) - - @skipIfRocm - def test_griffinlim(self): - tensor = torch.rand((1, 201, 6)) - n_fft = 400 - ws = 400 - hop = 200 - window = torch.hann_window(ws, device=tensor.device, dtype=tensor.dtype) - power = 2.0 - momentum = 0.99 - n_iter = 32 - length = 1000 - rand_int = False - self._assert_consistency( - F.griffinlim, (tensor, window, n_fft, hop, ws, power, n_iter, momentum, length, rand_int) - ) - - def test_compute_deltas(self): - channel = 13 - n_mfcc = channel * 3 - time = 1021 - tensor = torch.randn(channel, n_mfcc, time) - win_length = 2 * 7 + 1 - self._assert_consistency(F.compute_deltas, (tensor, win_length, "replicate")) - - def test_detect_pitch_frequency(self): - waveform = common_utils.get_sinusoid(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - return F.detect_pitch_frequency(tensor, sample_rate) - - self._assert_consistency(func, (waveform,)) - - def test_measure_loudness(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - sample_rate = 44100 - waveform = common_utils.get_sinusoid(sample_rate=sample_rate, device=self.device) - self._assert_consistency(F.loudness, (waveform, sample_rate)) - - def test_melscale_fbanks(self): - if self.device != torch.device("cpu"): - raise unittest.SkipTest("No need to perform test on device other than CPU") - - n_stft = 100 - f_min = 0.0 - f_max = 20.0 - n_mels = 10 - sample_rate = 16000 - norm = "slaney" - self._assert_consistency(F.melscale_fbanks, (n_stft, f_min, f_max, n_mels, sample_rate, norm, "htk")) - - def test_linear_fbanks(self): - if self.device != torch.device("cpu"): - raise unittest.SkipTest("No need to perform test on device other than CPU") - - n_stft = 100 - f_min = 0.0 - f_max = 20.0 - n_filter = 10 - sample_rate = 16000 - self._assert_consistency(F.linear_fbanks, (n_stft, f_min, f_max, n_filter, sample_rate)) - - def test_amplitude_to_DB(self): - tensor = torch.rand((6, 201)) - multiplier = 10.0 - amin = 1e-10 - db_multiplier = 0.0 - top_db = 80.0 - self._assert_consistency(F.amplitude_to_DB, (tensor, multiplier, amin, db_multiplier, top_db)) - - def test_DB_to_amplitude(self): - tensor = torch.rand((1, 100)) - ref = 1.0 - power = 1.0 - self._assert_consistency(F.DB_to_amplitude, (tensor, ref, power)) - - def test_create_dct(self): - if self.device != torch.device("cpu"): - raise unittest.SkipTest("No need to perform test on device other than CPU") - - n_mfcc = 40 - n_mels = 128 - norm = "ortho" - self._assert_consistency(F.create_dct, (n_mfcc, n_mels, norm)) - - def test_mu_law_encoding(self): - def func(tensor): - qc = 256 - return F.mu_law_encoding(tensor, qc) - - waveform = common_utils.get_whitenoise() - self._assert_consistency(func, (waveform,)) - - def test_mu_law_decoding(self): - def func(tensor): - qc = 256 - return F.mu_law_decoding(tensor, qc) - - tensor = torch.rand((1, 10)) - self._assert_consistency(func, (tensor,)) - - def test_mask_along_axis(self): - def func(tensor): - mask_param = 100 - mask_value = 30.0 - axis = 2 - return F.mask_along_axis(tensor, mask_param, mask_value, axis) - - tensor = torch.randn(2, 1025, 400) - self._assert_consistency(func, (tensor,)) - - def test_mask_along_axis_iid(self): - def func(tensor): - mask_param = 100 - mask_value = 30.0 - axis = 2 - return F.mask_along_axis_iid(tensor, mask_param, mask_value, axis) - - tensor = torch.randn(4, 2, 1025, 400) - self._assert_consistency(func, (tensor,)) - - def test_gain(self): - def func(tensor): - gainDB = 2.0 - return F.gain(tensor, gainDB) - - tensor = torch.rand((1, 1000)) - self._assert_consistency(func, (tensor,)) - - def test_dither_TPDF(self): - def func(tensor): - return F.dither(tensor, "TPDF") - - tensor = common_utils.get_whitenoise(n_channels=2) - self._assert_consistency(func, (tensor,), shape_only=True) - - def test_dither_RPDF(self): - def func(tensor): - return F.dither(tensor, "RPDF") - - tensor = common_utils.get_whitenoise(n_channels=2) - self._assert_consistency(func, (tensor,), shape_only=True) - - def test_dither_GPDF(self): - def func(tensor): - return F.dither(tensor, "GPDF") - - tensor = common_utils.get_whitenoise(n_channels=2) - self._assert_consistency(func, (tensor,), shape_only=True) - - def test_dither_noise_shaping(self): - def func(tensor): - return F.dither(tensor, noise_shaping=True) - - tensor = common_utils.get_whitenoise(n_channels=2) - self._assert_consistency(func, (tensor,)) - - def test_lfilter(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise() - # Design an IIR lowpass filter using scipy.signal filter design - # https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.iirdesign.html#scipy.signal.iirdesign - # - # Example - # >>> from scipy.signal import iirdesign - # >>> b, a = iirdesign(0.2, 0.3, 1, 60) - b_coeffs = torch.tensor( - [ - 0.00299893, - -0.0051152, - 0.00841964, - -0.00747802, - 0.00841964, - -0.0051152, - 0.00299893, - ], - device=waveform.device, - dtype=waveform.dtype, - ) - a_coeffs = torch.tensor( - [ - 1.0, - -4.8155751, - 10.2217618, - -12.14481273, - 8.49018171, - -3.3066882, - 0.56088705, - ], - device=waveform.device, - dtype=waveform.dtype, - ) - self._assert_consistency(F.lfilter, (waveform, a_coeffs, b_coeffs, True, True)) - - def test_filtfilt(self): - waveform = common_utils.get_whitenoise(sample_rate=8000) - b_coeffs = torch.rand(4, device=waveform.device, dtype=waveform.dtype) - a_coeffs = torch.rand(4, device=waveform.device, dtype=waveform.dtype) - self._assert_consistency(F.filtfilt, (waveform, a_coeffs, b_coeffs, True)) - - def test_lowpass(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - cutoff_freq = 3000.0 - return F.lowpass_biquad(tensor, sample_rate, cutoff_freq) - - self._assert_consistency(func, (waveform,)) - - def test_highpass(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - cutoff_freq = 2000.0 - return F.highpass_biquad(tensor, sample_rate, cutoff_freq) - - self._assert_consistency(func, (waveform,)) - - def test_allpass(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - central_freq = 1000.0 - q = 0.707 - return F.allpass_biquad(tensor, sample_rate, central_freq, q) - - self._assert_consistency(func, (waveform,)) - - def test_bandpass_with_csg(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - central_freq = 1000.0 - q = 0.707 - const_skirt_gain = True - return F.bandpass_biquad(tensor, sample_rate, central_freq, q, const_skirt_gain) - - self._assert_consistency(func, (waveform,)) - - def test_bandpass_without_csg(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - central_freq = 1000.0 - q = 0.707 - const_skirt_gain = True - return F.bandpass_biquad(tensor, sample_rate, central_freq, q, const_skirt_gain) - - self._assert_consistency(func, (waveform,)) - - def test_bandreject(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - central_freq = 1000.0 - q = 0.707 - return F.bandreject_biquad(tensor, sample_rate, central_freq, q) - - self._assert_consistency(func, (waveform,)) - - def test_band_with_noise(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - central_freq = 1000.0 - q = 0.707 - noise = True - return F.band_biquad(tensor, sample_rate, central_freq, q, noise) - - self._assert_consistency(func, (waveform,)) - - def test_band_without_noise(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - central_freq = 1000.0 - q = 0.707 - noise = False - return F.band_biquad(tensor, sample_rate, central_freq, q, noise) - - self._assert_consistency(func, (waveform,)) - - def test_treble(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - gain = 40.0 - central_freq = 1000.0 - q = 0.707 - return F.treble_biquad(tensor, sample_rate, gain, central_freq, q) - - self._assert_consistency(func, (waveform,)) - - def test_bass(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - gain = 40.0 - central_freq = 1000.0 - q = 0.707 - return F.bass_biquad(tensor, sample_rate, gain, central_freq, q) - - self._assert_consistency(func, (waveform,)) - - def test_deemph(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - return F.deemph_biquad(tensor, sample_rate) - - self._assert_consistency(func, (waveform,)) - - def test_riaa(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - return F.riaa_biquad(tensor, sample_rate) - - self._assert_consistency(func, (waveform,)) - - def test_equalizer(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - sample_rate = 44100 - center_freq = 300.0 - gain = 1.0 - q = 0.707 - return F.equalizer_biquad(tensor, sample_rate, center_freq, gain, q) - - self._assert_consistency(func, (waveform,)) - - def test_perf_biquad_filtering(self): - if self.dtype == torch.float64: - raise unittest.SkipTest("This test is known to fail for float64") - - waveform = common_utils.get_whitenoise() - - def func(tensor): - a = torch.tensor([0.7, 0.2, 0.6], device=tensor.device, dtype=tensor.dtype) - b = torch.tensor([0.4, 0.2, 0.9], device=tensor.device, dtype=tensor.dtype) - return F.lfilter(tensor, a, b) - - self._assert_consistency(func, (waveform,)) - - def test_sliding_window_cmn(self): - def func(tensor): - cmn_window = 600 - min_cmn_window = 100 - center = False - norm_vars = False - a = torch.tensor( - [[-1.915875792503357, 1.147700309753418], [1.8242558240890503, 1.3869990110397339]], - device=tensor.device, - dtype=tensor.dtype, - ) - return F.sliding_window_cmn(a, cmn_window, min_cmn_window, center, norm_vars) - - b = torch.tensor([[-1.8701, -0.1196], [1.8701, 0.1196]]) - self._assert_consistency(func, (b,)) - - def test_contrast(self): - waveform = common_utils.get_whitenoise() - - def func(tensor): - enhancement_amount = 80.0 - return F.contrast(tensor, enhancement_amount) - - self._assert_consistency(func, (waveform,)) - - def test_dcshift(self): - waveform = common_utils.get_whitenoise() - - def func(tensor): - shift = 0.5 - limiter_gain = 0.05 - return F.dcshift(tensor, shift, limiter_gain) - - self._assert_consistency(func, (waveform,)) - - def test_overdrive(self): - waveform = common_utils.get_whitenoise() - - def func(tensor): - gain = 30.0 - colour = 50.0 - return F.overdrive(tensor, gain, colour) - - self._assert_consistency(func, (waveform,)) - - def test_phaser(self): - waveform = common_utils.get_whitenoise(sample_rate=44100) - - def func(tensor): - gain_in = 0.5 - gain_out = 0.8 - delay_ms = 2.0 - decay = 0.4 - speed = 0.5 - sample_rate = 44100 - return F.phaser(tensor, sample_rate, gain_in, gain_out, delay_ms, decay, speed, sinusoidal=True) - - self._assert_consistency(func, (waveform,)) - - def test_flanger(self): - waveform = torch.rand(2, 100) - 0.5 - - def func(tensor): - delay = 0.8 - depth = 0.88 - regen = 3.0 - width = 0.23 - speed = 1.3 - phase = 60.0 - sample_rate = 44100 - return F.flanger( - tensor, - sample_rate, - delay, - depth, - regen, - width, - speed, - phase, - modulation="sinusoidal", - interpolation="linear", - ) - - self._assert_consistency(func, (waveform,)) - - def test_spectral_centroid(self): - def func(tensor): - sample_rate = 44100 - n_fft = 400 - ws = 400 - hop = 200 - pad = 0 - window = torch.hann_window(ws, device=tensor.device, dtype=tensor.dtype) - return F.spectral_centroid(tensor, sample_rate, pad, window, n_fft, hop, ws) - - tensor = common_utils.get_whitenoise(sample_rate=44100) - self._assert_consistency(func, (tensor,)) - - def test_resample_sinc(self): - def func(tensor): - sr1, sr2 = 16000, 8000 - return F.resample(tensor, sr1, sr2, resampling_method="sinc_interp_hann") - - tensor = common_utils.get_whitenoise(sample_rate=16000) - self._assert_consistency(func, (tensor,)) - - @parameterized.expand( - [ - (None,), - (6.0,), - ] - ) - def test_resample_kaiser(self, beta): - tensor = common_utils.get_whitenoise(sample_rate=16000) - sr1, sr2 = 16000, 8000 - lowpass_filter_width = 6 - rolloff = 0.99 - self._assert_consistency( - F.resample, (tensor, sr1, sr2, lowpass_filter_width, rolloff, "sinc_interp_kaiser", beta) - ) - - def test_phase_vocoder(self): - tensor = torch.view_as_complex(torch.randn(2, 1025, 400, 2)) - n_freq = tensor.size(-2) - rate = 0.5 - hop_length = 256 - phase_advance = torch.linspace( - 0, - 3.14 * hop_length, - n_freq, - dtype=torch.real(tensor).dtype, - device=tensor.device, - )[..., None] - self._assert_consistency_complex(F.phase_vocoder, (tensor, rate, phase_advance)) - - def test_psd(self): - batch_size = 2 - channel = 4 - n_fft_bin = 10 - frame = 10 - normalize = True - eps = 1e-10 - tensor = torch.rand(batch_size, channel, n_fft_bin, frame, dtype=self.complex_dtype) - self._assert_consistency_complex(F.psd, (tensor, None, normalize, eps)) - - def test_psd_with_mask(self): - batch_size = 2 - channel = 4 - n_fft_bin = 10 - frame = 10 - normalize = True - eps = 1e-10 - specgram = torch.rand(batch_size, channel, n_fft_bin, frame, dtype=self.complex_dtype) - mask = torch.rand(batch_size, n_fft_bin, frame, device=self.device) - self._assert_consistency_complex(F.psd, (specgram, mask, normalize, eps)) - - def test_mvdr_weights_souden(self): - channel = 4 - n_fft_bin = 10 - diagonal_loading = True - diag_eps = 1e-7 - eps = 1e-8 - psd_speech = torch.rand(n_fft_bin, channel, channel, dtype=torch.cfloat) - psd_noise = torch.rand(n_fft_bin, channel, channel, dtype=torch.cfloat) - self._assert_consistency_complex( - F.mvdr_weights_souden, (psd_speech, psd_noise, 0, diagonal_loading, diag_eps, eps) - ) - - def test_mvdr_weights_souden_with_tensor(self): - channel = 4 - n_fft_bin = 10 - diagonal_loading = True - diag_eps = 1e-7 - eps = 1e-8 - psd_speech = torch.rand(n_fft_bin, channel, channel, dtype=torch.cfloat) - psd_noise = torch.rand(n_fft_bin, channel, channel, dtype=torch.cfloat) - reference_channel = torch.zeros(channel) - reference_channel[..., 0].fill_(1) - self._assert_consistency_complex( - F.mvdr_weights_souden, (psd_speech, psd_noise, reference_channel, diagonal_loading, diag_eps, eps) - ) - - def test_mvdr_weights_rtf(self): - channel = 4 - n_fft_bin = 10 - diagonal_loading = True - diag_eps = 1e-7 - eps = 1e-8 - rtf = torch.rand(n_fft_bin, channel, dtype=self.complex_dtype) - psd_noise = torch.rand(n_fft_bin, channel, channel, dtype=self.complex_dtype) - reference_channel = 0 - self._assert_consistency_complex( - F.mvdr_weights_rtf, (rtf, psd_noise, reference_channel, diagonal_loading, diag_eps, eps) - ) - - def test_mvdr_weights_rtf_with_tensor(self): - channel = 4 - n_fft_bin = 10 - diagonal_loading = True - diag_eps = 1e-7 - eps = 1e-8 - rtf = torch.rand(n_fft_bin, channel, dtype=self.complex_dtype) - psd_noise = torch.rand(n_fft_bin, channel, channel, dtype=self.complex_dtype) - reference_channel = torch.zeros(channel) - reference_channel[..., 0].fill_(1) - self._assert_consistency_complex( - F.mvdr_weights_rtf, (rtf, psd_noise, reference_channel, diagonal_loading, diag_eps, eps) - ) - - def test_rtf_evd(self): - batch_size = 2 - channel = 4 - n_fft_bin = 129 - tensor = torch.rand(batch_size, n_fft_bin, channel, channel, dtype=self.complex_dtype) - self._assert_consistency_complex(F.rtf_evd, (tensor,)) - - @parameterized.expand( - [ - (1, True), - (3, False), - ] - ) - def test_rtf_power(self, n_iter, diagonal_loading): - channel = 4 - n_fft_bin = 10 - psd_speech = torch.rand(n_fft_bin, channel, channel, dtype=self.complex_dtype) - psd_noise = torch.rand(n_fft_bin, channel, channel, dtype=self.complex_dtype) - reference_channel = 0 - diag_eps = 1e-7 - self._assert_consistency_complex( - F.rtf_power, (psd_speech, psd_noise, reference_channel, n_iter, diagonal_loading, diag_eps) - ) - - @parameterized.expand( - [ - (1, True), - (3, False), - ] - ) - def test_rtf_power_with_tensor(self, n_iter, diagonal_loading): - channel = 4 - n_fft_bin = 10 - psd_speech = torch.rand(n_fft_bin, channel, channel, dtype=self.complex_dtype) - psd_noise = torch.rand(n_fft_bin, channel, channel, dtype=self.complex_dtype) - reference_channel = torch.zeros(channel) - reference_channel[..., 0].fill_(1) - diag_eps = 1e-7 - self._assert_consistency_complex( - F.rtf_power, (psd_speech, psd_noise, reference_channel, n_iter, diagonal_loading, diag_eps) - ) - - def test_apply_beamforming(self): - num_channels = 4 - n_fft_bin = 201 - num_frames = 10 - beamform_weights = torch.rand(n_fft_bin, num_channels, dtype=self.complex_dtype, device=self.device) - specgram = torch.rand(num_channels, n_fft_bin, num_frames, dtype=self.complex_dtype, device=self.device) - self._assert_consistency_complex(F.apply_beamforming, (beamform_weights, specgram)) - - @common_utils.nested_params( - ["convolve", "fftconvolve"], - ["full", "valid", "same"], - ) - def test_convolve(self, fn, mode): - leading_dims = (2, 3, 2) - L_x, L_y = 32, 55 - x = torch.rand(*leading_dims, L_x, dtype=self.dtype, device=self.device) - y = torch.rand(*leading_dims, L_y, dtype=self.dtype, device=self.device) - - self._assert_consistency(getattr(F, fn), (x, y, mode)) - - @common_utils.nested_params([True, False]) - def test_add_noise(self, use_lengths): - leading_dims = (2, 3) - L = 31 - - waveform = torch.rand(*leading_dims, L, dtype=self.dtype, device=self.device, requires_grad=True) - noise = torch.rand(*leading_dims, L, dtype=self.dtype, device=self.device, requires_grad=True) - if use_lengths: - lengths = torch.rand(*leading_dims, dtype=self.dtype, device=self.device, requires_grad=True) - else: - lengths = None - snr = torch.rand(*leading_dims, dtype=self.dtype, device=self.device, requires_grad=True) * 10 - - self._assert_consistency(F.add_noise, (waveform, noise, snr, lengths)) - - @common_utils.nested_params([True, False]) - def test_speed(self, use_lengths): - leading_dims = (3, 2) - T = 200 - waveform = torch.rand(*leading_dims, T, dtype=self.dtype, device=self.device, requires_grad=True) - if use_lengths: - lengths = torch.randint(1, T, leading_dims, dtype=self.dtype, device=self.device) - else: - lengths = None - self._assert_consistency(F.speed, (waveform, 1000, 1.1, lengths)) - - def test_preemphasis(self): - waveform = torch.rand(3, 2, 100, device=self.device, dtype=self.dtype) - coeff = 0.9 - self._assert_consistency(F.preemphasis, (waveform, coeff)) - - def test_deemphasis(self): - waveform = torch.rand(3, 2, 100, device=self.device, dtype=self.dtype) - coeff = 0.9 - self._assert_consistency(F.deemphasis, (waveform, coeff)) - - -class FunctionalFloat32Only(TestBaseMixin): - def test_rnnt_loss(self): - def func(tensor): - targets = torch.tensor([[1, 2]], device=tensor.device, dtype=torch.int32) - logit_lengths = torch.tensor([2], device=tensor.device, dtype=torch.int32) - target_lengths = torch.tensor([2], device=tensor.device, dtype=torch.int32) - # This is hack for those functions which are deprecated with decorators - # like @deprecated or @dropping_support. Adding the decorators breaks - # TorchScript. So here we use the private function which make the tests - # pass, but that's a lie: the public (deprecated) function doesn't - # support torchscript anymore - return F.functional._rnnt_loss(tensor, targets, logit_lengths, target_lengths) - - logits = torch.tensor( - [ - [ - [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]], - [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]], - ] - ] - ) - tensor = logits.to(device=self.device, dtype=torch.float32) - self._assert_consistency(func, (tensor,)) diff --git a/test/torchaudio_unittest/transforms/torchscript_consistency_cpu_test.py b/test/torchaudio_unittest/transforms/torchscript_consistency_cpu_test.py deleted file mode 100644 index 5ba0afe2b2..0000000000 --- a/test/torchaudio_unittest/transforms/torchscript_consistency_cpu_test.py +++ /dev/null @@ -1,14 +0,0 @@ -import torch -from torchaudio_unittest.common_utils import PytorchTestCase - -from .torchscript_consistency_impl import Transforms, TransformsFloat32Only - - -class TestTransformsFloat32(Transforms, TransformsFloat32Only, PytorchTestCase): - dtype = torch.float32 - device = torch.device("cpu") - - -class TestTransformsFloat64(Transforms, PytorchTestCase): - dtype = torch.float64 - device = torch.device("cpu") diff --git a/test/torchaudio_unittest/transforms/torchscript_consistency_cuda_test.py b/test/torchaudio_unittest/transforms/torchscript_consistency_cuda_test.py deleted file mode 100644 index 425e5646ff..0000000000 --- a/test/torchaudio_unittest/transforms/torchscript_consistency_cuda_test.py +++ /dev/null @@ -1,16 +0,0 @@ -import torch -from torchaudio_unittest.common_utils import PytorchTestCase, skipIfNoCuda - -from .torchscript_consistency_impl import Transforms, TransformsFloat32Only - - -@skipIfNoCuda -class TestTransformsFloat32(Transforms, TransformsFloat32Only, PytorchTestCase): - dtype = torch.float32 - device = torch.device("cuda") - - -@skipIfNoCuda -class TestTransformsFloat64(Transforms, PytorchTestCase): - dtype = torch.float64 - device = torch.device("cuda") diff --git a/test/torchaudio_unittest/transforms/torchscript_consistency_impl.py b/test/torchaudio_unittest/transforms/torchscript_consistency_impl.py deleted file mode 100644 index 6ada3351ab..0000000000 --- a/test/torchaudio_unittest/transforms/torchscript_consistency_impl.py +++ /dev/null @@ -1,290 +0,0 @@ -"""Test suites for jit-ability and its numerical compatibility""" - -import torch -import torchaudio.transforms as T -from parameterized import parameterized -from torchaudio_unittest import common_utils -from torchaudio_unittest.common_utils import skipIfRocm, TestBaseMixin, torch_script - - -class Transforms(TestBaseMixin): - """Implements test for Transforms that are performed for different devices""" - - def _assert_consistency(self, transform, tensor, *args): - tensor = tensor.to(device=self.device, dtype=self.dtype) - transform = transform.to(device=self.device, dtype=self.dtype) - - ts_transform = torch_script(transform) - - output = transform(tensor, *args) - ts_output = ts_transform(tensor, *args) - self.assertEqual(ts_output, output) - - def _assert_consistency_complex(self, transform, tensor, *args): - assert tensor.is_complex() - tensor = tensor.to(device=self.device, dtype=self.complex_dtype) - transform = transform.to(device=self.device, dtype=self.dtype) - - ts_transform = torch_script(transform) - - output = transform(tensor, *args) - ts_output = ts_transform(tensor, *args) - self.assertEqual(ts_output, output) - - def test_Spectrogram(self): - tensor = torch.rand((1, 1000)) - self._assert_consistency(T.Spectrogram(), tensor) - - def test_Spectrogram_return_complex(self): - tensor = torch.rand((1, 1000)) - self._assert_consistency(T.Spectrogram(power=None, return_complex=True), tensor) - - def test_InverseSpectrogram(self): - tensor = common_utils.get_whitenoise(sample_rate=8000) - spectrogram = common_utils.get_spectrogram(tensor, n_fft=400, hop_length=100) - self._assert_consistency_complex(T.InverseSpectrogram(n_fft=400, hop_length=100), spectrogram) - - @skipIfRocm - def test_GriffinLim(self): - tensor = torch.rand((1, 201, 6)) - self._assert_consistency(T.GriffinLim(length=1000, rand_init=False), tensor) - - def test_AmplitudeToDB(self): - spec = torch.rand((6, 201)) - self._assert_consistency(T.AmplitudeToDB(), spec) - - def test_MelScale(self): - spec_f = torch.rand((1, 201, 6)) - self._assert_consistency(T.MelScale(n_stft=201), spec_f) - - def test_MelSpectrogram(self): - tensor = torch.rand((1, 1000)) - self._assert_consistency(T.MelSpectrogram(), tensor) - - def test_MFCC(self): - tensor = torch.rand((1, 1000)) - self._assert_consistency(T.MFCC(), tensor) - - def test_LFCC(self): - tensor = torch.rand((1, 1000)) - self._assert_consistency(T.LFCC(), tensor) - - def test_Resample(self): - sr1, sr2 = 16000, 8000 - tensor = common_utils.get_whitenoise(sample_rate=sr1) - self._assert_consistency(T.Resample(sr1, sr2), tensor) - - def test_MuLawEncoding(self): - tensor = common_utils.get_whitenoise() - self._assert_consistency(T.MuLawEncoding(), tensor) - - def test_MuLawDecoding(self): - tensor = torch.rand((1, 10)) - self._assert_consistency(T.MuLawDecoding(), tensor) - - def test_ComputeDelta(self): - tensor = torch.rand((1, 10)) - self._assert_consistency(T.ComputeDeltas(), tensor) - - def test_Fade(self): - waveform = common_utils.get_whitenoise() - fade_in_len = 3000 - fade_out_len = 3000 - self._assert_consistency(T.Fade(fade_in_len, fade_out_len), waveform) - - def test_FrequencyMasking(self): - tensor = torch.rand((10, 2, 50, 10, 2)) - self._assert_consistency(T.FrequencyMasking(freq_mask_param=60, iid_masks=False), tensor) - - def test_TimeMasking(self): - tensor = torch.rand((10, 2, 50, 10, 2)) - self._assert_consistency(T.TimeMasking(time_mask_param=30, iid_masks=False), tensor) - - def test_Vol(self): - waveform = common_utils.get_whitenoise() - self._assert_consistency(T.Vol(1.1), waveform) - - def test_SlidingWindowCmn(self): - tensor = torch.rand((1000, 10)) - self._assert_consistency(T.SlidingWindowCmn(), tensor) - - def test_Vad(self): - filepath = common_utils.get_asset_path("vad-go-mono-32000.wav") - waveform, sample_rate = common_utils.load_wav(filepath) - self._assert_consistency(T.Vad(sample_rate=sample_rate), waveform) - - def test_SpectralCentroid(self): - sample_rate = 44100 - waveform = common_utils.get_whitenoise(sample_rate=sample_rate) - self._assert_consistency(T.SpectralCentroid(sample_rate=sample_rate), waveform) - - def test_TimeStretch(self): - n_fft = 1025 - n_freq = n_fft // 2 + 1 - hop_length = 512 - fixed_rate = 1.3 - tensor = torch.rand((10, 2, n_freq, 10), dtype=torch.cfloat) - batch = 10 - num_channels = 2 - - waveform = common_utils.get_whitenoise(sample_rate=8000, n_channels=batch * num_channels) - tensor = common_utils.get_spectrogram(waveform, n_fft=n_fft) - tensor = tensor.reshape(batch, num_channels, n_freq, -1) - self._assert_consistency_complex( - T.TimeStretch(n_freq=n_freq, hop_length=hop_length, fixed_rate=fixed_rate), - tensor, - ) - - def test_PitchShift(self): - sample_rate = 8000 - n_steps = 4 - waveform = common_utils.get_whitenoise(sample_rate=sample_rate) - pitch_shift = T.PitchShift(sample_rate=sample_rate, n_steps=n_steps) - # dry-run for initializing parameters - pitch_shift(waveform) - self._assert_consistency(pitch_shift, waveform) - - def test_PSD(self): - tensor = common_utils.get_whitenoise(sample_rate=8000, n_channels=4) - spectrogram = common_utils.get_spectrogram(tensor, n_fft=400, hop_length=100) - spectrogram = spectrogram.to(self.device) - self._assert_consistency_complex(T.PSD(), spectrogram) - - def test_PSD_with_mask(self): - tensor = common_utils.get_whitenoise(sample_rate=8000, n_channels=4) - spectrogram = common_utils.get_spectrogram(tensor, n_fft=400, hop_length=100) - spectrogram = spectrogram.to(self.device) - mask = torch.rand(spectrogram.shape[-2:], device=self.device) - self._assert_consistency_complex(T.PSD(), spectrogram, mask) - - @parameterized.expand( - [ - ["ref_channel", True], - ["stv_evd", True], - ["stv_power", True], - ["ref_channel", False], - ["stv_evd", False], - ["stv_power", False], - ] - ) - def test_MVDR(self, solution, online): - tensor = common_utils.get_whitenoise(sample_rate=8000, n_channels=4) - spectrogram = common_utils.get_spectrogram(tensor, n_fft=400, hop_length=100) - mask_s = torch.rand(spectrogram.shape[-2:], device=self.device) - mask_n = torch.rand(spectrogram.shape[-2:], device=self.device) - self._assert_consistency_complex(T.MVDR(solution=solution, online=online), spectrogram, mask_s, mask_n) - - def test_rtf_mvdr(self): - tensor = common_utils.get_whitenoise(sample_rate=8000, n_channels=4) - specgram = common_utils.get_spectrogram(tensor, n_fft=400, hop_length=100) - channel, freq, _ = specgram.shape - rtf = torch.rand(freq, channel, dtype=self.complex_dtype, device=self.device) - psd_n = torch.rand(freq, channel, channel, dtype=self.complex_dtype, device=self.device) - reference_channel = 0 - self._assert_consistency_complex(T.RTFMVDR(), specgram, rtf, psd_n, reference_channel) - - def test_souden_mvdr(self): - tensor = common_utils.get_whitenoise(sample_rate=8000, n_channels=4) - specgram = common_utils.get_spectrogram(tensor, n_fft=400, hop_length=100) - channel, freq, _ = specgram.shape - psd_s = torch.rand(freq, channel, channel, dtype=self.complex_dtype, device=self.device) - psd_n = torch.rand(freq, channel, channel, dtype=self.complex_dtype, device=self.device) - reference_channel = 0 - self._assert_consistency_complex(T.SoudenMVDR(), specgram, psd_s, psd_n, reference_channel) - - @common_utils.nested_params( - ["Convolve", "FFTConvolve"], - ["full", "valid", "same"], - ) - def test_convolve(self, cls, mode): - leading_dims = (2, 3, 2) - L_x, L_y = 32, 55 - x = torch.rand(*leading_dims, L_x, dtype=self.dtype, device=self.device) - y = torch.rand(*leading_dims, L_y, dtype=self.dtype, device=self.device) - - convolve = getattr(T, cls)(mode=mode).to(device=self.device, dtype=self.dtype) - output = convolve(x, y) - ts_output = torch_script(convolve)(x, y) - self.assertEqual(ts_output, output) - - @common_utils.nested_params([True, False]) - def test_speed(self, use_lengths): - leading_dims = (3, 2) - time = 200 - waveform = torch.rand(*leading_dims, time, dtype=self.dtype, device=self.device, requires_grad=True) - - if use_lengths: - lengths = torch.randint(1, time, leading_dims, dtype=self.dtype, device=self.device) - else: - lengths = None - - speed = T.Speed(1000, 0.9).to(self.device, self.dtype) - output = speed(waveform, lengths) - ts_output = torch_script(speed)(waveform, lengths) - self.assertEqual(ts_output, output) - - @common_utils.nested_params([True, False]) - def test_speed_perturbation(self, use_lengths): - leading_dims = (3, 2) - time = 200 - waveform = torch.rand(*leading_dims, time, dtype=self.dtype, device=self.device, requires_grad=True) - - if use_lengths: - lengths = torch.randint(1, time, leading_dims, dtype=self.dtype, device=self.device) - else: - lengths = None - - speed = T.SpeedPerturbation(1000, [0.9]).to(self.device, self.dtype) - output = speed(waveform, lengths) - ts_output = torch_script(speed)(waveform, lengths) - self.assertEqual(ts_output, output) - - @common_utils.nested_params([True, False]) - def test_add_noise(self, use_lengths): - leading_dims = (2, 3) - L = 31 - - waveform = torch.rand(*leading_dims, L, dtype=self.dtype, device=self.device, requires_grad=True) - noise = torch.rand(*leading_dims, L, dtype=self.dtype, device=self.device, requires_grad=True) - if use_lengths: - lengths = torch.rand(*leading_dims, dtype=self.dtype, device=self.device, requires_grad=True) - else: - lengths = None - snr = torch.rand(*leading_dims, dtype=self.dtype, device=self.device, requires_grad=True) * 10 - - add_noise = T.AddNoise().to(self.device, self.dtype) - output = add_noise(waveform, noise, snr, lengths) - ts_output = torch_script(add_noise)(waveform, noise, snr, lengths) - self.assertEqual(ts_output, output) - - def test_preemphasis(self): - waveform = torch.rand(3, 4, 10, dtype=self.dtype, device=self.device) - preemphasis = T.Preemphasis(coeff=0.97).to(dtype=self.dtype, device=self.device) - output = preemphasis(waveform) - ts_output = torch_script(preemphasis)(waveform) - self.assertEqual(ts_output, output) - - def test_deemphasis(self): - waveform = torch.rand(3, 4, 10, dtype=self.dtype, device=self.device) - deemphasis = T.Deemphasis(coeff=0.97).to(dtype=self.dtype, device=self.device) - output = deemphasis(waveform) - ts_output = torch_script(deemphasis)(waveform) - self.assertEqual(ts_output, output) - - -class TransformsFloat32Only(TestBaseMixin): - def test_rnnt_loss(self): - logits = torch.tensor( - [ - [ - [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1], [0.1, 0.1, 0.2, 0.8, 0.1]], - [[0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.2, 0.1, 0.1], [0.7, 0.1, 0.2, 0.1, 0.1]], - ] - ] - ) - tensor = logits.to(device=self.device, dtype=torch.float32) - targets = torch.tensor([[1, 2]], device=tensor.device, dtype=torch.int32) - logit_lengths = torch.tensor([2], device=tensor.device, dtype=torch.int32) - target_lengths = torch.tensor([2], device=tensor.device, dtype=torch.int32) - - self._assert_consistency(T.RNNTLoss(), logits, targets, logit_lengths, target_lengths)