in basic_pitch/data/datasets/medleydb_pitch.py [0:0]
def create_input_data(train_percent: float, seed: Optional[int] = None) -> List[Tuple[str, str]]:
assert train_percent < 1.0, "Don't over allocate the data!"
if seed:
random.seed(seed)
medleydb_pitch = mirdata.initialize("medleydb_pitch")
track_ids = medleydb_pitch.track_ids
random.shuffle(track_ids)
def determine_split(index: int) -> str:
return "train" if index < len(track_ids) * train_percent else "validation"
return [(track_id, determine_split(i)) for i, track_id in enumerate(track_ids)]