in realbook/layers/nnaudio.py [0:0]
def downsampling_by_n(x: tf.Tensor, filter_kernel: tf.Tensor, n: int, match_torch_exactly: bool = True) -> tf.Tensor:
"""
Downsample the given tensor using the given filter kernel.
The input tensor is expected to have shape `(n_batches, channels, width)`,
and the filter kernel is expected to have shape `(num_output_channels,)` (i.e.: 1D)
If match_torch_exactly is passed, we manually pad the input rather than having TensorFlow do so with "SAME".
The result is subtly different than Torch's output, but it is compatible with TensorFlow Lite (as of v2.4.1).
"""
if match_torch_exactly:
paddings = [
[0, 0],
[0, 0],
[(filter_kernel.shape[-1] - 1) // 2, (filter_kernel.shape[-1] - 1) // 2],
]
padded = tf.pad(x, paddings)
# Store this tensor in the shape `(n_batches, width, channels)`
padded_nwc = tf.transpose(padded, [0, 2, 1])
result_nwc = tf.nn.conv1d(padded_nwc, filter_kernel[:, None, None], padding="VALID", stride=n)
else:
x_nwc = tf.transpose(x, [0, 2, 1])
result_nwc = tf.nn.conv1d(x_nwc, filter_kernel[:, None, None], padding="SAME", stride=n)
result_ncw = tf.transpose(result_nwc, [0, 2, 1])
return result_ncw