in basic_pitch/data/datasets/medleydb_pitch.py [0:0]
def process(self, element: List[str], *args: Tuple[Any, Any], **kwargs: Dict[str, Any]) -> List[Any]:
import tempfile
import numpy as np
import sox
from basic_pitch.constants import (
AUDIO_N_CHANNELS,
AUDIO_SAMPLE_RATE,
FREQ_BINS_CONTOURS,
FREQ_BINS_NOTES,
ANNOTATION_HOP,
N_FREQ_BINS_NOTES,
N_FREQ_BINS_CONTOURS,
)
from basic_pitch.dataset import tf_example_serialization
logging.info(f"Processing {element}")
batch = []
for track_id in element:
track_remote = self.medleydb_pitch_remote.track(track_id)
with tempfile.TemporaryDirectory() as local_tmp_dir:
medleydb_pitch_local = mirdata.initialize("medleydb_pitch", local_tmp_dir)
track_local = medleydb_pitch_local.track(track_id)
for attr in self.DOWNLOAD_ATTRIBUTES:
source = getattr(track_remote, attr)
dest = getattr(track_local, attr)
os.makedirs(os.path.dirname(dest), exist_ok=True)
with self.filesystem.open(source) as s, open(dest, "wb") as d:
d.write(s.read())
# will be in temp dir and get cleaned up
local_wav_path = "{}_tmp.wav".format(track_local.audio_path)
tfm = sox.Transformer()
tfm.rate(AUDIO_SAMPLE_RATE)
tfm.channels(AUDIO_N_CHANNELS)
tfm.build(track_local.audio_path, local_wav_path)
duration = sox.file_info.duration(local_wav_path)
time_scale = np.arange(0, duration + ANNOTATION_HOP, ANNOTATION_HOP)
n_time_frames = len(time_scale)
if track_local.notes_pyin is not None:
note_indices, note_values = track_local.notes_pyin.to_sparse_index(
time_scale, "s", FREQ_BINS_NOTES, "hz"
)
onset_indices, onset_values = track_local.notes_pyin.to_sparse_index(
time_scale, "s", FREQ_BINS_NOTES, "hz", onsets_only=True
)
note_shape = (n_time_frames, N_FREQ_BINS_NOTES)
# if there are no notes, return empty note indices
else:
note_shape = (0, 0)
note_indices = []
onset_indices = []
note_values = []
onset_values = []
contour_indices, contour_values = track_local.pitch.to_sparse_index(
time_scale, "s", FREQ_BINS_CONTOURS, "hz"
)
batch.append(
tf_example_serialization.to_transcription_tfexample(
track_id,
"medleydb_pitch",
local_wav_path,
note_indices,
note_values,
onset_indices,
onset_values,
contour_indices,
contour_values,
note_shape,
(n_time_frames, N_FREQ_BINS_CONTOURS),
)
)
return [batch]