in basic_pitch/layers/nnaudio.py [0:0]
def build(self, input_shape: tf.TensorShape) -> None:
# This will be used to calculate filter_cutoff and creating CQT kernels
Q = float(self.filter_scale) / (2 ** (1 / self.bins_per_octave) - 1)
self.lowpass_filter = create_lowpass_filter(band_center=0.5, kernel_length=256, transition_bandwidth=0.001)
# Calculate num of filter requires for the kernel
# n_octaves determines how many resampling requires for the CQT
n_filters = min(self.bins_per_octave, self.n_bins)
self.n_octaves = int(np.ceil(float(self.n_bins) / self.bins_per_octave))
# Calculate the lowest frequency bin for the top octave kernel
self.fmin_t = self.fmin * 2 ** (self.n_octaves - 1)
remainder = self.n_bins % self.bins_per_octave
if remainder == 0:
# Calculate the top bin frequency
fmax_t = self.fmin_t * 2 ** ((self.bins_per_octave - 1) / self.bins_per_octave)
else:
# Calculate the top bin frequency
fmax_t = self.fmin_t * 2 ** ((remainder - 1) / self.bins_per_octave)
self.fmin_t = fmax_t / 2 ** (1 - 1 / self.bins_per_octave) # Adjusting the top minium bins
if fmax_t > self.sample_rate / 2:
raise ValueError(
"The top bin {}Hz has exceeded the Nyquist frequency, please reduce the n_bins".format(fmax_t)
)
if self.earlydownsample is True: # Do early downsampling if this argument is True
(
self.sample_rate,
self.hop_length,
self.downsample_factor,
early_downsample_filter,
self.earlydownsample,
) = get_early_downsample_params(self.sample_rate, self.hop_length, fmax_t, Q, self.n_octaves, self.dtype)
self.early_downsample_filter = early_downsample_filter
else:
self.downsample_factor = 1.0
# Preparing CQT kernels
basis, self.n_fft, _, _ = create_cqt_kernels(
Q,
self.sample_rate,
self.fmin_t,
n_filters,
self.bins_per_octave,
norm=self.basis_norm,
topbin_check=False,
)
# For the normalization in the end
# The freqs returned by create_cqt_kernels cannot be used
# Since that returns only the top octave bins
# We need the information for all freq bin
freqs = self.fmin * 2.0 ** (np.r_[0 : self.n_bins] / float(self.bins_per_octave))
self.frequencies = freqs
self.lengths = np.ceil(Q * self.sample_rate / freqs)
self.basis = basis
# NOTE(psobot): this is where the implementation here starts to differ from CQT2010.
# These cqt_kernel is already in the frequency domain
self.cqt_kernels_real = tf.expand_dims(basis.real.astype(self.dtype), 1)
self.cqt_kernels_imag = tf.expand_dims(basis.imag.astype(self.dtype), 1)
if self.trainable:
self.cqt_kernels_real = tf.Variable(initial_value=self.cqt_kernels_real, trainable=True)
self.cqt_kernels_imag = tf.Variable(initial_value=self.cqt_kernels_imag, trainable=True)
# If center==True, the STFT window will be put in the middle, and paddings at the beginning
# and ending are required.
if self.pad_mode == "constant":
self.padding = ConstantPad1D(self.n_fft // 2, 0)
elif self.pad_mode == "reflect":
self.padding = ReflectionPad1D(self.n_fft // 2)
rank = len(input_shape)
if rank == 2:
self.reshape_input = lambda x: x[:, None, :]
elif rank == 1:
self.reshape_input = lambda x: x[None, None, :]
elif rank == 3:
self.reshape_input = lambda x: x
else:
raise ValueError(f"Input shape must be rank <= 3, found shape {input_shape}")