auto mux()

in vireo/android/util.cpp [78:153]


auto mux(internal::demux::MP4& mp4_decoder, internal::decode::H264_BYTESTREAM& h264_bytestream_decoder, int fps_factor, int width, int height) -> mux::MP4 {
  THROW_IF(h264_bytestream_decoder.count() == 0, Invalid);
  THROW_IF(fps_factor <= 0, InvalidArguments);
  THROW_IF(!((width > 0 && height > 0) || (width == 0 && height == 0)), InvalidArguments);

  vector<encode::Sample> audio_samples;
  vector<encode::Sample> video_samples;
  vector<common::EditBox> edit_boxes;

  // Get an ordered list of trimmed video pts (we assume H264 AnnexB has frames with ordered pts)
  vector<uint64_t> valid_pts;
  for (auto sample: mp4_decoder.video_track) {
    int64_t new_pts = common::EditBox::RealPts(mp4_decoder.video_track.edit_boxes(), sample.pts);
    if (new_pts != -1) {
      valid_pts.push_back(new_pts);
    }
  }
  sort(valid_pts.begin(), valid_pts.end());
  const uint32_t num_frames = common::ceil_divide((uint32_t)valid_pts.size(), (uint32_t)1, (uint32_t)fps_factor);
  THROW_IF(num_frames != h264_bytestream_decoder.count(), Invalid);

  const int64_t video_first_pts = valid_pts[0];
  const int64_t audio_pts_offset = video_first_pts * mp4_decoder.audio_track.settings().timescale / mp4_decoder.video_track.settings().timescale;

  uint32_t video_sample_index = 0;
  uint32_t audio_sample_index = 0;
  int64_t audio_first_dts = -1;
  for (auto sample_func: h264_bytestream_decoder) {
    auto v_sample = raw_sample_convert(sample_func(), valid_pts[video_sample_index] - video_first_pts, valid_pts[video_sample_index] - video_first_pts);
    const float v_dts = (float)v_sample.dts / (mp4_decoder.video_track.settings().timescale);

    while (audio_sample_index < mp4_decoder.audio_track.count()) {
      auto a_sample = mp4_decoder.audio_track(audio_sample_index);
      const float a_dts = (float)a_sample.dts / (mp4_decoder.audio_track.settings().timescale);

      if (a_dts < v_dts) {
        // Since we remove edit boxes from video, we will adjust audio edit boxes accordingly
        // Thus we should just keep the audio samples within new edit box bounds
        if (a_sample.dts >= audio_pts_offset) {
          if (audio_first_dts == -1) {
            audio_first_dts = a_sample.dts;
          }
          audio_samples.push_back(encode::Sample(mp4_decoder.audio_track(audio_sample_index).shift(-audio_first_dts)));
        }
        ++audio_sample_index;
      } else {
        break;
      }
    }
    video_samples.push_back(v_sample);
    video_sample_index += fps_factor;
  }
  // Since we remove edit boxes from video, we need to align audio edit boxes accordingly
  // Also if we removed any audio samples in the muxing process, we should reflect that here
  bool first_edit_box = true;
  for (auto edit_box: mp4_decoder.audio_track.edit_boxes()) {
    if (first_edit_box) {
      const int64_t start_pts = edit_box.start_pts + audio_pts_offset - audio_first_dts;
      const uint64_t duration_pts = edit_box.duration_pts - audio_pts_offset;
      edit_boxes.push_back(common::EditBox(start_pts, duration_pts, edit_box.rate, edit_box.type));
    } else {
      const int64_t start_pts = edit_box.start_pts - audio_first_dts;
      const uint64_t duration_pts = edit_box.duration_pts;
      edit_boxes.push_back(common::EditBox(start_pts, duration_pts, edit_box.rate, edit_box.type));
    }
  }

  auto output_video_settings = settings::Video { settings::Video::Codec::H264,
                                                 width ? (uint16_t)width : mp4_decoder.video_track.settings().width,
                                                 height ? (uint16_t)height : mp4_decoder.video_track.settings().height,
                                                 mp4_decoder.video_track.settings().timescale,
                                                 mp4_decoder.video_track.settings().orientation,
                                                 h264_bytestream_decoder.sps_pps() };
  return mux::MP4(functional::Audio<encode::Sample>(audio_samples, mp4_decoder.audio_track.settings()),
                  functional::Video<encode::Sample>(video_samples, output_video_settings), edit_boxes);
}