in basic_pitch/data/datasets/maestro.py [0:0]
def process(self, element: Tuple[str, str], *args: Tuple[Any, Any], **kwargs: Dict[str, Any]) -> Any:
import tempfile
import sox
track_id, split = element
logging.info(f"Processing (track_id, split): ({track_id}, {split})")
track_remote = self.maestro_remote.track(track_id)
with tempfile.TemporaryDirectory() as local_tmp_dir:
maestro_local = mirdata.initialize("maestro", local_tmp_dir)
track_local = maestro_local.track(track_id)
for attribute in self.DOWNLOAD_ATTRIBUTES:
source = getattr(track_remote, attribute)
destination = getattr(track_local, attribute)
os.makedirs(os.path.dirname(destination), exist_ok=True)
with self.filesystem.open(source) as s, open(destination, "wb") as d:
for piece in read_in_chunks(s):
d.write(piece)
# 15 minutes * 60 seconds/minute
if sox.file_info.duration(track_local.audio_path) >= 15 * 60:
return None
yield beam.pvalue.TaggedOutput(split, track_id)