bool finish_initialization()

in vireo/internal/demux/mp2ts.cpp [507:633]


  bool finish_initialization() {
    if (format_context == nullptr) {
      return false;
    }
    // Gather stream specific info
    const uint32_t num_streams = format_context->nb_streams;
    for (uint32_t stream_index = 0; stream_index < num_streams; ++stream_index) {
      AVStream* stream = format_context->streams[stream_index];
      AVCodecContext* codec_context = stream->codec;
      SampleType type = SampleType::Unknown;
      CHECK(codec_context);
      if (codec_context->codec_type == AVMEDIA_TYPE_VIDEO) {
        type = SampleType::Video;
      } else if (codec_context->codec_type == AVMEDIA_TYPE_AUDIO) {
        type = SampleType::Audio;
      } else if (codec_context->codec_type == AVMEDIA_TYPE_DATA) {
        type = SampleType::Data;
      } else {  // AVMEDIA_TYPE_UNKNOWN
        continue;
      }

      tracks(type).index = stream_index;
      THROW_IF(stream->time_base.num != 1, Unsupported);
      tracks(type).timescale = stream->time_base.den;

      if (type == SampleType::Video) {
        if (codec_context->codec_id == AV_CODEC_ID_H264) {
          video.codec = settings::Video::Codec::H264;
        } else {
          THROW_IF(codec_context->codec_id != AV_CODEC_ID_H264, Unsupported);
        }
      } else if (type == SampleType::Audio) {
        if (codec_context->codec_id == AV_CODEC_ID_AAC) {
          audio.codec = settings::Audio::Codec::AAC_Main;  // At this point we only know it's AAC, we don't know the actual profile
        } else {
          THROW_IF(codec_context->codec_id != AV_CODEC_ID_AAC, Unsupported);
        }
      } else {
        CHECK(type == SampleType::Data);
        if (codec_context->codec_id == AV_CODEC_ID_TIMED_ID3) {
          data.codec = settings::Data::Codec::TimedID3;
        }
      }
    }

    // Parse packets
    audio.cache.clear();
    video.cache.clear();
    AVPacket packet;
    while (av_read_frame(format_context.get(), &packet) >= 0) {
      SampleType type = SampleType::Unknown;
      uint32_t stream_index = packet.stream_index;
      if (stream_index == tracks(SampleType::Video).index) {
        type = SampleType::Video;
      } else if (stream_index == tracks(SampleType::Audio).index) {
        type = SampleType::Audio;
      } else if (stream_index == tracks(SampleType::Data).index) {
        type = SampleType::Data;
      }

      if (type == SampleType::Video) {
        if (video.codec == settings::Video::Codec::H264) {
          process_h264_packet(packet);
        }
      } else if (type == SampleType::Audio) {
        if (audio.codec == settings::Audio::Codec::AAC_Main ||
            audio.codec == settings::Audio::Codec::AAC_LC) {
          process_aac_packet(packet);
        }
      } else if (type == SampleType::Data) {
        process_timed_id3_packet(packet);
      }
      av_packet_unref(&packet);
    }

    // Calculate duration of the tracks from the parsed packets
    for (auto type: enumeration::Enum<SampleType>(SampleType::Video, SampleType::Caption)) {
      if (tracks(type).dts_offsets_per_packet.size()) {
        vector<uint32_t> dts_offsets_per_sample;
        if (type == SampleType::Audio) {
          CHECK(audio.samples_per_packet.size() == tracks(type).dts_offsets_per_packet.size() + 1);
        }
        for (uint32_t index = 0; index < tracks(type).dts_offsets_per_packet.size(); ++index) {
          uint32_t dts_offset_per_packet = tracks(type).dts_offsets_per_packet[index];
          uint32_t samples_per_packet = (type == SampleType::Audio) ? audio.samples_per_packet[index] : 1;
          uint32_t dts_offset_per_sample = common::round_divide(tracks(type).dts_offsets_per_packet[index], (uint32_t)1, samples_per_packet);
          tracks(type).duration += dts_offset_per_packet;
          for (uint32_t i = 0; i < samples_per_packet; ++i) {
            dts_offsets_per_sample.push_back(dts_offset_per_sample);
          }
        }
        uint64_t last_dts_offset;
        if (type == SampleType::Audio) {
          last_dts_offset = common::round_divide<uint64_t>((uint64_t) kMP2TSTimescale * AUDIO_FRAME_SIZE, audio.samples_per_packet.back(), audio.sample_rate);
        } else {
          last_dts_offset = common::median(dts_offsets_per_sample);
        }
        tracks(type).duration += last_dts_offset;
      }
    }

    // In case there were multiple audio samples per packet, PTS/DTS values have to be adjusted
    if (audio.multiple_samples_per_packet) {
      int32_t index = -1;
      uint64_t num_samples = 0;
      uint64_t current_sample = 0;
      uint32_t start_pts = 0;
      uint32_t start_dts = 0;
      for (auto& sample: tracks(SampleType::Audio).samples) {
        if (current_sample == num_samples) {
          index++;
          num_samples = audio.samples_per_packet[index];
          current_sample = 0;
          start_pts = sample.pts;
          start_dts = sample.dts;
        } else {
          int64_t dts_offset_this_sample = (int64_t) kMP2TSTimescale * current_sample * AUDIO_FRAME_SIZE / audio.sample_rate;
          CHECK(dts_offset_this_sample);
          sample.pts = (uint32_t)(start_pts + dts_offset_this_sample);
          sample.dts = (uint32_t)(start_dts + dts_offset_this_sample);
        }
        current_sample++;
      }
    }

    return true;
  }