in realbook/layers/nnaudio.py [0:0]
def call(self, x: tf.Tensor) -> tf.Tensor:
x = self.reshape_input(x)
if self.earlydownsample is True:
x = downsampling_by_n(
x,
self.early_downsample_filter,
self.downsample_factor,
self.match_torch_exactly,
)
hop = self.hop_length
# Getting the top octave CQT
CQT = get_cqt_complex(x, self.cqt_kernels_real, self.cqt_kernels_imag, hop, self.padding)
x_down = x # Preparing a new variable for downsampling
for i in range(self.n_octaves - 1):
hop = hop // 2
x_down = downsampling_by_n(x_down, self.lowpass_filter, 2, self.match_torch_exactly)
CQT1 = get_cqt_complex(x_down, self.cqt_kernels_real, self.cqt_kernels_imag, hop, self.padding)
CQT = tf.concat((CQT1, CQT), axis=1)
CQT = CQT[:, -self.n_bins :, :] # Removing unwanted bottom bins
# Normalizing the output with the downsampling factor, 2**(self.n_octaves-1) is make it
# same mag as 1992
CQT = CQT * self.downsample_factor
# Normalize again to get same result as librosa
if self.normalization_type == "librosa":
CQT *= tf.math.sqrt(tf.cast(self.lengths.reshape((-1, 1, 1)), self.dtype))
elif self.normalization_type == "convolutional":
pass
elif self.normalization_type == "wrap":
CQT *= 2
else:
raise ValueError("The normalization_type %r is not part of our current options." % self.normalization_type)
# Transpose the output to match the output of the other spectrogram layers.
if self.output_format.lower() == "magnitude":
# Getting CQT Amplitude
return tf.transpose(
tf.math.sqrt(tf.math.reduce_sum(tf.math.pow(CQT, 2), axis=-1)),
[0, 2, 1],
)
elif self.output_format.lower() == "complex":
return CQT
elif self.output_format.lower() == "phase":
phase_real = tf.math.cos(tf.math.atan2(CQT[:, :, :, 1], CQT[:, :, :, 0]))
phase_imag = tf.math.sin(tf.math.atan2(CQT[:, :, :, 1], CQT[:, :, :, 0]))
return tf.stack((phase_real, phase_imag), axis=-1)