in basic_pitch/data/datasets/slakh.py [0:0]
def process(self, element: Tuple[str, str]) -> Any:
import tempfile
import apache_beam as beam
import ffmpeg
from basic_pitch.constants import (
AUDIO_N_CHANNELS,
AUDIO_SAMPLE_RATE,
)
track_id, split = element
if split == "omitted":
return None
logging.info(f"Processing (track_id, split): ({track_id}, {split})")
track_remote = self.slakh_remote.track(track_id)
with tempfile.TemporaryDirectory() as local_tmp_dir:
slakh_local = mirdata.initialize("slakh", local_tmp_dir)
track_local = slakh_local.track(track_id)
for attr in self.DOWNLOAD_ATTRIBUTES:
source = getattr(track_remote, attr)
dest = getattr(track_local, attr)
if not dest:
return None
logging.info(f"Downloading {attr} from {source} to {dest}")
os.makedirs(os.path.dirname(dest), exist_ok=True)
with self.filesystem.open(source) as s, open(dest, "wb") as d:
d.write(s.read())
if track_local.is_drum:
return None
local_wav_path = "{}_tmp.wav".format(track_local.audio_path)
try:
ffmpeg.input(track_local.audio_path).output(
local_wav_path, ar=AUDIO_SAMPLE_RATE, ac=AUDIO_N_CHANNELS
).run()
except Exception as e:
logging.info(f"Could not process {local_wav_path}. Exception: {e}")
return None
# if there are no notes, skip this track
if track_local.notes is None or len(track_local.notes.intervals) == 0:
return None
yield beam.pvalue.TaggedOutput(split, track_id)