in basic_pitch/note_creation.py [0:0]
def model_frames_to_time(n_frames: int) -> np.ndarray:
original_times = librosa.core.frames_to_time(
np.arange(n_frames),
sr=AUDIO_SAMPLE_RATE,
hop_length=FFT_HOP,
)
window_numbers = np.floor(np.arange(n_frames) / ANNOT_N_FRAMES)
window_offset = (FFT_HOP / AUDIO_SAMPLE_RATE) * (
ANNOT_N_FRAMES - (AUDIO_N_SAMPLES / FFT_HOP)
) + 0.0018 # this is a magic number, but it's needed for this to align properly
times = original_times - (window_offset * window_numbers)
return times