export function outputToNotesPoly()

in src/toMidi.ts [342:530]


export function outputToNotesPoly(
  frames: number[][],
  onsets: number[][],
  onsetThresh: number = 0.5,
  frameThresh: number = 0.3,
  minNoteLen: number = 5,
  inferOnsets: boolean = true,
  maxFreq: Optional<number> = null,
  minFreq: Optional<number> = null,
  melodiaTrick: boolean = true,
  energyTolerance: number = 11,
): NoteEvent[] {
  let inferredFrameThresh = frameThresh;
  if (inferredFrameThresh === null) {
    // calculate mean and std deviation of a flattened frames
    const [mean, std] = meanStdDev(frames);
    inferredFrameThresh = mean + std;
  }

  const nFrames = frames.length;

  // Modifies onsets and frames in place.
  constrainFrequency(onsets, frames, maxFreq, minFreq);

  let inferredOnsets = onsets;
  if (inferOnsets) {
    inferredOnsets = getInferredOnsets(onsets, frames); // avoid no-param-reassign
  }

  // a hacky form of zeros-like
  const peakThresholdMatrix = inferredOnsets.map(o => o.map(() => 0));
  argRelMax(inferredOnsets).forEach(([row, col]) => {
    peakThresholdMatrix[row][col] = inferredOnsets[row][col];
  });

  const [noteStarts, freqIdxs] = whereGreaterThanAxis1(
    peakThresholdMatrix,
    onsetThresh,
  );

  noteStarts.reverse();
  freqIdxs.reverse();

  // Deep copy to remaining energy
  const remainingEnergy = frames.map(frame => frame.slice());

  const noteEvents = noteStarts
    .map((noteStartIdx, idx) => {
      const freqIdx = freqIdxs[idx];
      // if we're too close to the end of the audio, continue
      if (noteStartIdx >= nFrames - 1) {
        return null;
      }

      // find time index at this frequency band where the frames drop below an energy threshold
      let i = noteStartIdx + 1;
      let k = 0; // number of frames since energy dropped below threshold
      while (i < nFrames - 1 && k < energyTolerance) {
        if (remainingEnergy[i][freqIdx] < inferredFrameThresh) {
          k += 1;
        } else {
          k = 0;
        }
        i += 1;
      }

      i -= k; // go back to frame above threshold

      // if the note is too short, skip it
      if (i - noteStartIdx <= minNoteLen) {
        return null;
      }

      for (let j = noteStartIdx; j < i; ++j) {
        remainingEnergy[j][freqIdx] = 0;
        if (freqIdx < MAX_FREQ_IDX) {
          remainingEnergy[j][freqIdx + 1] = 0;
        }
        if (freqIdx > 0) {
          remainingEnergy[j][freqIdx - 1] = 0;
        }
      }

      // add the note
      const amplitude =
        frames
          .slice(noteStartIdx, i)
          .reduce((prev, row) => prev + row[freqIdx], 0) /
        (i - noteStartIdx);

      return {
        startFrame: noteStartIdx,
        durationFrames: i - noteStartIdx,
        pitchMidi: freqIdx + MIDI_OFFSET,
        amplitude: amplitude,
      };
    })
    .filter(isNotNull);

  if (melodiaTrick === true) {
    while (globalMax(remainingEnergy) > inferredFrameThresh) {
      // i_mid, freq_idx = np.unravel_index(np.argmax(remaining_energy), energy_shape)
      // We want the (row, column) with the largest value in remainingEnergy
      const [iMid, freqIdx] = remainingEnergy.reduce(
        (prevCoord, currRow, rowIdx) => {
          const colMaxIdx = argMax(currRow)!;
          return currRow[colMaxIdx] >
            remainingEnergy[prevCoord[0]][prevCoord[1]]
            ? [rowIdx, colMaxIdx]
            : prevCoord;
        },
        [0, 0],
      );
      remainingEnergy[iMid][freqIdx] = 0;
      // forward pass
      let i = iMid + 1;
      let k = 0;
      while (i < nFrames - 1 && k < energyTolerance) {
        if (remainingEnergy[i][freqIdx] < inferredFrameThresh) {
          k += 1;
        } else {
          k = 0;
        }

        remainingEnergy[i][freqIdx] = 0;
        if (freqIdx < MAX_FREQ_IDX) {
          remainingEnergy[i][freqIdx + 1] = 0;
        }
        if (freqIdx > 0) {
          remainingEnergy[i][freqIdx - 1] = 0;
        }

        i += 1;
      }
      const iEnd = i - 1 - k;

      // backwards pass
      i = iMid - 1;
      k = 0;
      while (i > 0 && k < energyTolerance) {
        if (remainingEnergy[i][freqIdx] < inferredFrameThresh) {
          k += 1;
        } else {
          k = 0;
        }

        remainingEnergy[i][freqIdx] = 0;
        if (freqIdx < MAX_FREQ_IDX) {
          remainingEnergy[i][freqIdx + 1] = 0;
        }
        if (freqIdx > 0) {
          remainingEnergy[i][freqIdx - 1] = 0;
        }

        i -= 1;
      }
      const iStart = i + 1 + k;
      if (iStart < 0) {
        throw new Error(`iStart is not positive! value: ${iStart}`);
      }

      if (iEnd >= nFrames) {
        throw new Error(
          `iEnd is past end of times. (iEnd, times.length): (${iEnd}, ${nFrames})`,
        );
      }

      // amplitude = np.mean(frames[i_start:i_end, freq_idx])
      const amplitude =
        frames.slice(iStart, iEnd).reduce((sum, row) => sum + row[freqIdx], 0) /
        (iEnd - iStart);

      if (iEnd - iStart <= minNoteLen) {
        // note is too short or too quiet, skip it and remove the energy
        continue;
      }

      // add the note
      noteEvents.push({
        startFrame: iStart,
        durationFrames: iEnd - iStart,
        pitchMidi: freqIdx + MIDI_OFFSET,
        amplitude: amplitude,
      });
    }
  }

  return noteEvents;
}