in src/toMidi.ts [342:530]
export function outputToNotesPoly(
frames: number[][],
onsets: number[][],
onsetThresh: number = 0.5,
frameThresh: number = 0.3,
minNoteLen: number = 5,
inferOnsets: boolean = true,
maxFreq: Optional<number> = null,
minFreq: Optional<number> = null,
melodiaTrick: boolean = true,
energyTolerance: number = 11,
): NoteEvent[] {
let inferredFrameThresh = frameThresh;
if (inferredFrameThresh === null) {
// calculate mean and std deviation of a flattened frames
const [mean, std] = meanStdDev(frames);
inferredFrameThresh = mean + std;
}
const nFrames = frames.length;
// Modifies onsets and frames in place.
constrainFrequency(onsets, frames, maxFreq, minFreq);
let inferredOnsets = onsets;
if (inferOnsets) {
inferredOnsets = getInferredOnsets(onsets, frames); // avoid no-param-reassign
}
// a hacky form of zeros-like
const peakThresholdMatrix = inferredOnsets.map(o => o.map(() => 0));
argRelMax(inferredOnsets).forEach(([row, col]) => {
peakThresholdMatrix[row][col] = inferredOnsets[row][col];
});
const [noteStarts, freqIdxs] = whereGreaterThanAxis1(
peakThresholdMatrix,
onsetThresh,
);
noteStarts.reverse();
freqIdxs.reverse();
// Deep copy to remaining energy
const remainingEnergy = frames.map(frame => frame.slice());
const noteEvents = noteStarts
.map((noteStartIdx, idx) => {
const freqIdx = freqIdxs[idx];
// if we're too close to the end of the audio, continue
if (noteStartIdx >= nFrames - 1) {
return null;
}
// find time index at this frequency band where the frames drop below an energy threshold
let i = noteStartIdx + 1;
let k = 0; // number of frames since energy dropped below threshold
while (i < nFrames - 1 && k < energyTolerance) {
if (remainingEnergy[i][freqIdx] < inferredFrameThresh) {
k += 1;
} else {
k = 0;
}
i += 1;
}
i -= k; // go back to frame above threshold
// if the note is too short, skip it
if (i - noteStartIdx <= minNoteLen) {
return null;
}
for (let j = noteStartIdx; j < i; ++j) {
remainingEnergy[j][freqIdx] = 0;
if (freqIdx < MAX_FREQ_IDX) {
remainingEnergy[j][freqIdx + 1] = 0;
}
if (freqIdx > 0) {
remainingEnergy[j][freqIdx - 1] = 0;
}
}
// add the note
const amplitude =
frames
.slice(noteStartIdx, i)
.reduce((prev, row) => prev + row[freqIdx], 0) /
(i - noteStartIdx);
return {
startFrame: noteStartIdx,
durationFrames: i - noteStartIdx,
pitchMidi: freqIdx + MIDI_OFFSET,
amplitude: amplitude,
};
})
.filter(isNotNull);
if (melodiaTrick === true) {
while (globalMax(remainingEnergy) > inferredFrameThresh) {
// i_mid, freq_idx = np.unravel_index(np.argmax(remaining_energy), energy_shape)
// We want the (row, column) with the largest value in remainingEnergy
const [iMid, freqIdx] = remainingEnergy.reduce(
(prevCoord, currRow, rowIdx) => {
const colMaxIdx = argMax(currRow)!;
return currRow[colMaxIdx] >
remainingEnergy[prevCoord[0]][prevCoord[1]]
? [rowIdx, colMaxIdx]
: prevCoord;
},
[0, 0],
);
remainingEnergy[iMid][freqIdx] = 0;
// forward pass
let i = iMid + 1;
let k = 0;
while (i < nFrames - 1 && k < energyTolerance) {
if (remainingEnergy[i][freqIdx] < inferredFrameThresh) {
k += 1;
} else {
k = 0;
}
remainingEnergy[i][freqIdx] = 0;
if (freqIdx < MAX_FREQ_IDX) {
remainingEnergy[i][freqIdx + 1] = 0;
}
if (freqIdx > 0) {
remainingEnergy[i][freqIdx - 1] = 0;
}
i += 1;
}
const iEnd = i - 1 - k;
// backwards pass
i = iMid - 1;
k = 0;
while (i > 0 && k < energyTolerance) {
if (remainingEnergy[i][freqIdx] < inferredFrameThresh) {
k += 1;
} else {
k = 0;
}
remainingEnergy[i][freqIdx] = 0;
if (freqIdx < MAX_FREQ_IDX) {
remainingEnergy[i][freqIdx + 1] = 0;
}
if (freqIdx > 0) {
remainingEnergy[i][freqIdx - 1] = 0;
}
i -= 1;
}
const iStart = i + 1 + k;
if (iStart < 0) {
throw new Error(`iStart is not positive! value: ${iStart}`);
}
if (iEnd >= nFrames) {
throw new Error(
`iEnd is past end of times. (iEnd, times.length): (${iEnd}, ${nFrames})`,
);
}
// amplitude = np.mean(frames[i_start:i_end, freq_idx])
const amplitude =
frames.slice(iStart, iEnd).reduce((sum, row) => sum + row[freqIdx], 0) /
(iEnd - iStart);
if (iEnd - iStart <= minNoteLen) {
// note is too short or too quiet, skip it and remove the energy
continue;
}
// add the note
noteEvents.push({
startFrame: iStart,
durationFrames: iEnd - iStart,
pitchMidi: freqIdx + MIDI_OFFSET,
amplitude: amplitude,
});
}
}
return noteEvents;
}