in basic_pitch/data/datasets/maestro.py [0:0]
def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
time_created = int(time.time())
destination = commandline.resolve_destination(known_args, time_created)
# TODO: Remove or abstract for foss
pipeline_options = {
"runner": known_args.runner,
"job_name": f"maestro-tfrecords-{time_created}",
"machine_type": "e2-highmem-4",
"num_workers": 25,
"disk_size_gb": 128,
"experiments": ["use_runner_v2", "no_use_multiple_sdk_containers"],
"save_main_session": True,
"sdk_container_image": known_args.sdk_container_image,
"job_endpoint": known_args.job_endpoint,
"environment_type": "DOCKER",
"environment_config": known_args.sdk_container_image,
}
input_data = create_input_data(known_args.source)
pipeline.run(
pipeline_options,
pipeline_args,
input_data,
MaestroToTfExample(known_args.source, download=True),
MaestroInvalidTracks(known_args.source),
destination,
known_args.batch_size,
)