def process()

in basic_pitch/data/datasets/maestro.py [0:0]


    def process(self, element: Tuple[str, str], *args: Tuple[Any, Any], **kwargs: Dict[str, Any]) -> Any:
        import tempfile
        import sox

        track_id, split = element
        logging.info(f"Processing (track_id, split): ({track_id}, {split})")

        track_remote = self.maestro_remote.track(track_id)
        with tempfile.TemporaryDirectory() as local_tmp_dir:
            maestro_local = mirdata.initialize("maestro", local_tmp_dir)
            track_local = maestro_local.track(track_id)

            for attribute in self.DOWNLOAD_ATTRIBUTES:
                source = getattr(track_remote, attribute)
                destination = getattr(track_local, attribute)
                os.makedirs(os.path.dirname(destination), exist_ok=True)
                with self.filesystem.open(source) as s, open(destination, "wb") as d:
                    for piece in read_in_chunks(s):
                        d.write(piece)

            # 15 minutes * 60 seconds/minute
            if sox.file_info.duration(track_local.audio_path) >= 15 * 60:
                return None

        yield beam.pvalue.TaggedOutput(split, track_id)