in vireo/internal/demux/mp4.cpp [415:518]
void parse_samples(lsmash_track_parameters_t& track_param, SampleType type) {
if (tracks(type).duration) {
THROW_IF(lsmash_construct_timeline(root.get(), tracks(type).track_ID) != 0, Invalid);
tracks(type).sample_count = lsmash_get_sample_count_in_media_timeline(root.get(), tracks(type).track_ID);
}
if (tracks(type).sample_count) {
if (type == SampleType::Audio && settings::Audio::IsPCM(audio.codec)) {
// accumulate neighboring PCM samples into larger pieces to reduce total number of samples (MEDIASERV-6821)
uint8_t num_bytes_per_sample = sizeof(int16_t) * audio.channels;
if (audio.codec == settings::Audio::Codec::PCM_S24LE ||
audio.codec == settings::Audio::Codec::PCM_S24BE) {
num_bytes_per_sample = 24 / CHAR_BIT * audio.channels;
}
const uint32_t max_bytes_to_accumulate = AUDIO_FRAME_SIZE * num_bytes_per_sample;
uint32_t total_bytes = 0;
auto aligned_with_audio_frame_size = [num_bytes_per_sample](uint32_t bytes) -> bool {
return bytes % (AUDIO_FRAME_SIZE * num_bytes_per_sample) == 0;
};
auto save_anchor_sample = [_this = this, &total_bytes, num_bytes_per_sample, &aligned_with_audio_frame_size](lsmash_sample_t anchor_sample, uint32_t size) {
THROW_IF(anchor_sample.pos > numeric_limits<uint32_t>::max(), Overflow);
uint32_t pos = (uint32_t)anchor_sample.pos;
auto nal = [_this, pos, size]() -> common::Data32 {
auto nal_data = _this->reader.read(pos, size);
THROW_IF(nal_data.count() != size, ReaderError);
return move(nal_data);
};
bool keyframe = anchor_sample.prop.ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC;
keyframe &= aligned_with_audio_frame_size(total_bytes); // safe to split track at these boundaries
_this->audio.pcm_samples.push_back(Sample(anchor_sample.cts,
anchor_sample.dts,
keyframe,
SampleType::Audio,
nal,
pos,
size));
total_bytes += size;
};
lsmash_sample_t anchor_sample;
lsmash_sample_t prev_sample;
uint32_t bytes_accumulated = 0;
for (int index = 0; index < tracks(SampleType::Audio).sample_count; ++index) {
lsmash_sample_t sample;
lsmash_get_sample_info_from_media_timeline(root.get(), tracks(SampleType::Audio).track_ID, index + 1, &sample);
THROW_IF(sample.length != num_bytes_per_sample, Unsupported);
bool first_sample = index == 0;
if (first_sample) {
anchor_sample = sample;
} else {
THROW_IF(sample.cts - prev_sample.cts != 1, Unsupported);
THROW_IF(sample.dts - prev_sample.dts != 1, Unsupported);
}
bool aligned = aligned_with_audio_frame_size(total_bytes + bytes_accumulated);
bool continuous = sample.pos == prev_sample.pos + prev_sample.length;
CHECK(bytes_accumulated <= max_bytes_to_accumulate);
bool enough_bytes = bytes_accumulated == max_bytes_to_accumulate;
bool new_data_block = !first_sample && (!continuous || aligned || enough_bytes);
bool last_sample = index == (tracks(SampleType::Audio).sample_count - 1);
if (new_data_block || last_sample) {
// save the anchor sample and mark current sample as the anchor
if (last_sample && !new_data_block) {
bytes_accumulated += sample.length; // also add last sample as part of anchor sample
}
save_anchor_sample(anchor_sample, bytes_accumulated);
if (last_sample && new_data_block) {
save_anchor_sample(sample, sample.length); // add last sample separately
}
anchor_sample = sample;
bytes_accumulated = 0;
}
bytes_accumulated += sample.length;
prev_sample = sample;
}
tracks(type).sample_count = (uint32_t)audio.pcm_samples.size(); // update sample count
} else {
lsmash_media_ts_list_t ts_list;
THROW_IF(lsmash_get_media_timestamps(root.get(), tracks(type).track_ID, &ts_list) != 0, Invalid);
THROW_IF(!ts_list.timestamp, Invalid);
CHECK(ts_list.sample_count == tracks(type).sample_count);
enforce_correct_pts(ts_list); // Mitigation of MEDIASERV-4739
tracks(type).timestamps.reset(ts_list.timestamp);
}
if (type == SampleType::Video) {
// Create an additional list of pts sorted timestamps - used for Open GOP detection
CHECK(tracks(type).timestamps);
vector<lsmash_media_ts_t> pts_sorted_timestamps(tracks(type).timestamps.get(), tracks(type).timestamps.get() + tracks(type).sample_count);
sort(pts_sorted_timestamps.begin(), pts_sorted_timestamps.end(), [](const lsmash_media_ts_t& a, const lsmash_media_ts_t& b){ return a.cts < b.cts; });
swap(video.pts_sorted_timestamps, pts_sorted_timestamps);
// Handle non-standard inputs, discard samples at the beginning of the video track until the first keyframe
for (uint32_t index = 0; index < tracks(type).sample_count; ++index) {
lsmash_sample_property_t sample_property;
lsmash_get_sample_property_from_media_timeline(root.get(), tracks(type).track_ID, index + 1, &sample_property);
if (sample_property.ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_SYNC) {
video.first_keyframe_index = index;
break;
}
}
}
}
}