void parse_samples()

in vireo/internal/demux/mp4.cpp [415:518]


  void parse_samples(lsmash_track_parameters_t& track_param, SampleType type) {
    if (tracks(type).duration) {
      THROW_IF(lsmash_construct_timeline(root.get(), tracks(type).track_ID) != 0, Invalid);
      tracks(type).sample_count = lsmash_get_sample_count_in_media_timeline(root.get(), tracks(type).track_ID);
    }

    if (tracks(type).sample_count) {
      if (type == SampleType::Audio && settings::Audio::IsPCM(audio.codec)) {
        // accumulate neighboring PCM samples into larger pieces to reduce total number of samples (MEDIASERV-6821)
        uint8_t num_bytes_per_sample = sizeof(int16_t) * audio.channels;
        if (audio.codec == settings::Audio::Codec::PCM_S24LE ||
            audio.codec == settings::Audio::Codec::PCM_S24BE) {
          num_bytes_per_sample = 24 / CHAR_BIT * audio.channels;
        }
        const uint32_t max_bytes_to_accumulate = AUDIO_FRAME_SIZE * num_bytes_per_sample;
        uint32_t total_bytes = 0;

        auto aligned_with_audio_frame_size = [num_bytes_per_sample](uint32_t bytes) -> bool {
          return bytes % (AUDIO_FRAME_SIZE * num_bytes_per_sample) == 0;
        };
        auto save_anchor_sample = [_this = this, &total_bytes, num_bytes_per_sample, &aligned_with_audio_frame_size](lsmash_sample_t anchor_sample, uint32_t size) {
          THROW_IF(anchor_sample.pos > numeric_limits<uint32_t>::max(), Overflow);
          uint32_t pos = (uint32_t)anchor_sample.pos;
          auto nal = [_this, pos, size]() -> common::Data32 {
            auto nal_data = _this->reader.read(pos, size);
            THROW_IF(nal_data.count() != size, ReaderError);
            return move(nal_data);
          };
          bool keyframe = anchor_sample.prop.ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC;
          keyframe &= aligned_with_audio_frame_size(total_bytes);  // safe to split track at these boundaries
          _this->audio.pcm_samples.push_back(Sample(anchor_sample.cts,
                                                    anchor_sample.dts,
                                                    keyframe,
                                                    SampleType::Audio,
                                                    nal,
                                                    pos,
                                                    size));
          total_bytes += size;
        };
        lsmash_sample_t anchor_sample;
        lsmash_sample_t prev_sample;
        uint32_t bytes_accumulated = 0;
        for (int index = 0; index < tracks(SampleType::Audio).sample_count; ++index) {
          lsmash_sample_t sample;
          lsmash_get_sample_info_from_media_timeline(root.get(), tracks(SampleType::Audio).track_ID, index + 1, &sample);
          THROW_IF(sample.length != num_bytes_per_sample, Unsupported);

          bool first_sample = index == 0;
          if (first_sample) {
            anchor_sample = sample;
          } else {
            THROW_IF(sample.cts - prev_sample.cts != 1, Unsupported);
            THROW_IF(sample.dts - prev_sample.dts != 1, Unsupported);
          }
          bool aligned = aligned_with_audio_frame_size(total_bytes + bytes_accumulated);
          bool continuous = sample.pos == prev_sample.pos + prev_sample.length;
          CHECK(bytes_accumulated <= max_bytes_to_accumulate);
          bool enough_bytes = bytes_accumulated == max_bytes_to_accumulate;
          bool new_data_block = !first_sample && (!continuous || aligned || enough_bytes);
          bool last_sample = index == (tracks(SampleType::Audio).sample_count - 1);
          if (new_data_block || last_sample) {
            // save the anchor sample and mark current sample as the anchor
            if (last_sample && !new_data_block) {
              bytes_accumulated += sample.length;  // also add last sample as part of anchor sample
            }
            save_anchor_sample(anchor_sample, bytes_accumulated);
            if (last_sample && new_data_block) {
              save_anchor_sample(sample, sample.length);  // add last sample separately
            }
            anchor_sample = sample;
            bytes_accumulated = 0;
          }
          bytes_accumulated += sample.length;
          prev_sample = sample;
        }
        tracks(type).sample_count = (uint32_t)audio.pcm_samples.size();  // update sample count
      } else {
        lsmash_media_ts_list_t ts_list;
        THROW_IF(lsmash_get_media_timestamps(root.get(), tracks(type).track_ID, &ts_list) != 0, Invalid);
        THROW_IF(!ts_list.timestamp, Invalid);
        CHECK(ts_list.sample_count == tracks(type).sample_count);
        enforce_correct_pts(ts_list);  // Mitigation of MEDIASERV-4739
        tracks(type).timestamps.reset(ts_list.timestamp);
      }

      if (type == SampleType::Video) {
        // Create an additional list of pts sorted timestamps - used for Open GOP detection
        CHECK(tracks(type).timestamps);
        vector<lsmash_media_ts_t> pts_sorted_timestamps(tracks(type).timestamps.get(), tracks(type).timestamps.get() + tracks(type).sample_count);
        sort(pts_sorted_timestamps.begin(), pts_sorted_timestamps.end(), [](const lsmash_media_ts_t& a, const lsmash_media_ts_t& b){ return a.cts < b.cts; });
        swap(video.pts_sorted_timestamps, pts_sorted_timestamps);

        // Handle non-standard inputs, discard samples at the beginning of the video track until the first keyframe
        for (uint32_t index = 0; index < tracks(type).sample_count; ++index) {
          lsmash_sample_property_t sample_property;
          lsmash_get_sample_property_from_media_timeline(root.get(), tracks(type).track_ID, index + 1, &sample_property);
          if (sample_property.ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC) {
            video.first_keyframe_index = index;
            break;
          }
        }
      }
    }
  }