in basic_pitch/data/datasets/slakh.py [0:0]
def process(self, element: List[str]) -> List[Any]:
import tempfile
import numpy as np
import ffmpeg
from basic_pitch.constants import (
AUDIO_N_CHANNELS,
AUDIO_SAMPLE_RATE,
FREQ_BINS_CONTOURS,
FREQ_BINS_NOTES,
ANNOTATION_HOP,
N_FREQ_BINS_NOTES,
N_FREQ_BINS_CONTOURS,
)
from basic_pitch.data import tf_example_serialization
logging.info(f"Processing {element}")
batch = []
for track_id in element:
track_remote = self.slakh_remote.track(track_id)
with tempfile.TemporaryDirectory() as local_tmp_dir:
slakh_local = mirdata.initialize("slakh", local_tmp_dir)
track_local = slakh_local.track(track_id)
for attr in self.DOWNLOAD_ATTRIBUTES:
source = getattr(track_remote, attr)
dest = getattr(track_local, attr)
logging.info(f"Downloading {attr} from {source} to {dest}")
os.makedirs(os.path.dirname(dest), exist_ok=True)
with self.filesystem.open(source) as s, open(dest, "wb") as d:
d.write(s.read())
local_wav_path = "{}_tmp.wav".format(track_local.audio_path)
ffmpeg.input(track_local.audio_path).output(
local_wav_path, ar=AUDIO_SAMPLE_RATE, ac=AUDIO_N_CHANNELS
).run()
duration = float(ffmpeg.probe(local_wav_path)["format"]["duration"])
time_scale = np.arange(0, duration + ANNOTATION_HOP, ANNOTATION_HOP)
n_time_frames = len(time_scale)
note_indices, note_values = track_local.notes.to_sparse_index(time_scale, "s", FREQ_BINS_NOTES, "hz")
onset_indices, onset_values = track_local.notes.to_sparse_index(
time_scale, "s", FREQ_BINS_NOTES, "hz", onsets_only=True
)
contour_indices, contour_values = track_local.multif0.to_sparse_index(
time_scale, "s", FREQ_BINS_CONTOURS, "hz"
)
batch.append(
tf_example_serialization.to_transcription_tfexample(
track_id,
"slakh",
local_wav_path,
note_indices,
note_values,
onset_indices,
onset_values,
contour_indices,
contour_values,
(n_time_frames, N_FREQ_BINS_NOTES),
(n_time_frames, N_FREQ_BINS_CONTOURS),
)
)
logging.info(f"Finished processing batch of length {len(batch)}")
return [batch]