auto H264::operator()

in vireo/internal/decode/h264.cpp [138:280]


auto H264::operator()(uint32_t index) const -> frame::Frame {
  THROW_IF(index >= count(), OutOfRange);
  THROW_IF(index >= _this->frame_infos.size(), OutOfRange);

  frame::Frame frame;
  frame.pts = _this->frame_infos[index].pts;
  frame.yuv = [_this = _this, index, keyframe = _this->frame_infos[index].keyframe]() -> frame::YUV {
    unique_ptr<AVFrame, function<void(AVFrame*)>> frame(av_frame_alloc(), [](AVFrame* frame) {
      av_frame_unref(frame);
      av_free(frame);
    });
    auto settings = _this->video_track.settings();

    auto previous_idr_frame = [&_this, &settings](uint32_t index) -> uint32_t {
      THROW_IF(index >= _this->video_track.count(), OutOfRange);
      while (index) {
        // we return 0 if we cannot find an actual IDR frame <= index
        // this ensures that we at least attempt to decode starting from first available frame
        const Sample& sample = _this->video_track((uint32_t)index);
        bool is_idr = sample.keyframe && intra_decode_refresh(sample.nal(), settings.sps_pps.nalu_length_size);
        if (is_idr) {
          break;
        }
        index--;
      }
      return index;
    };

    auto flush_decoder_buffers = [&_this]() {
      avcodec_flush_buffers(_this->codec_context.get());
      _this->num_cached_frames = 0;
    };

    auto update_resolution = [](const AVFrame* frame, settings::Video& settings) {
      settings.width = (uint16_t)frame->width;
      settings.height = (uint16_t)frame->height;
      if (frame->sample_aspect_ratio.num) {
        if (frame->sample_aspect_ratio.num < frame->sample_aspect_ratio.den) {
          settings.width = settings.width * frame->sample_aspect_ratio.num / frame->sample_aspect_ratio.den;
        } else {
          settings.height = settings.height * frame->sample_aspect_ratio.den / frame->sample_aspect_ratio.num;
        }
      }
    };

    auto decode_frame = [_this = _this, &frame, keyframe, &settings, update_resolution](uint32_t index) {
      THROW_IF(index >= _this->video_track.count(), OutOfRange);
      AVPacket packet;
      int got_picture = 0;
      while (!got_picture) {
        av_init_packet(&packet);
        if (index + _this->num_cached_frames < _this->video_track.count()) {
          const Sample& sample = _this->video_track(index + _this->num_cached_frames);
          const common::Data32 nal = sample.nal();
          av_new_packet(&packet, nal.count());
          memcpy((void*)packet.data, nal.data() + nal.a(), nal.count());
          packet.pts = sample.pts;
          packet.dts = sample.dts;
          packet.flags = sample.keyframe ? AV_PKT_FLAG_KEY : 0;
          CHECK(avcodec_decode_video2(_this->codec_context.get(), frame.get(), &got_picture, &packet) == packet.size);
        } else {
          CHECK(_this->num_cached_frames > 0);
          av_init_packet(&packet);
          packet.data = NULL;
          packet.size = 0;
          CHECK(avcodec_decode_video2(_this->codec_context.get(), frame.get(), &got_picture, &packet) == 0);
          if (!got_picture) {
            break;
          }
          _this->num_cached_frames--;
        }
        av_packet_unref(&packet);
        if (got_picture) {
          if (settings.width == 0 && settings.height == 0) { // infer from decoded frame when not specified in settings
            update_resolution(frame.get(), settings);
          }
          THROW_IF(frame->format != AV_PIX_FMT_YUV420P && frame->format != AV_PIX_FMT_YUVJ420P, Unsupported);
        } else {
          _this->num_cached_frames++;
          THROW_IF(_this->num_cached_frames > std::min((int)_this->video_track.count(), 32), Unsafe);
        }
      }
      if (!got_picture && keyframe) {  // TODO: remove once MEDIASERV-4386 is resolved
        THROW_IF(!intra_decode_refresh(_this->video_track(index).nal(), settings.sps_pps.nalu_length_size), Unsupported);
      }
      CHECK(got_picture);
      _this->last_decoded_index = index;
    };

    if (index - _this->last_decoded_index == 1) {
      // optimization: current sample is right after last decoded sample - continue normally
      decode_frame(index);
    } else {
      if (keyframe) {
        // at the beginning of a gop boundary - start fresh
        if (_this->num_cached_frames) {
          flush_decoder_buffers();
        }
        decode_frame(index);
      } else {
        uint32_t index_to_start_decoding = previous_idr_frame(index);;
        if (index_to_start_decoding <= _this->last_decoded_index && index > _this->last_decoded_index) {
          // optimization: we can just decode the frames starting from last decoded index - no need to decode from previous IDR
          index_to_start_decoding = (uint32_t)(_this->last_decoded_index + 1);
        } else {
          // we have to start fresh
          flush_decoder_buffers();
        }
        THROW_IF(index - index_to_start_decoding >= security::kMaxGOPSize, Unsafe,
                 "GOP is too large (need to decode frame " << index_to_start_decoding << " for frame " << index
                 << " (max allowed = " << security::kMaxGOPSize << ")");
        for (uint32_t current_index = index_to_start_decoding; current_index <= index; ++current_index) {
          decode_frame(current_index);
        }
      }
    }
    AVFrame* yFrame = av_frame_clone(frame.get());
    AVFrame* uFrame = av_frame_clone(frame.get());
    AVFrame* vFrame = av_frame_clone(frame.get());
    common::Data32 yData(frame->data[0], frame->linesize[0] * frame->height, [yFrame](uint8_t*) {
      av_frame_unref(yFrame);
      av_free(yFrame);
    });
    common::Data32 uData(frame->data[1], frame->linesize[1] * frame->height / 2, [uFrame](uint8_t*) {
      av_frame_unref(uFrame);
      av_free(uFrame);
    });
    common::Data32 vData(frame->data[2], frame->linesize[2] * frame->height / 2, [vFrame](uint8_t*) {
      av_frame_unref(vFrame);
      av_free(vFrame);
    });
    frame::Plane y((uint16_t)frame->linesize[0], (uint16_t)frame->width, (uint16_t)frame->height, move(yData));
    frame::Plane u((uint16_t)frame->linesize[1], (uint16_t)frame->width / 2, (uint16_t)frame->height / 2, move(uData));
    frame::Plane v((uint16_t)frame->linesize[2], (uint16_t)frame->width / 2, (uint16_t)frame->height / 2, move(vData));

    auto yuv = frame::YUV(move(y), move(u), move(v), false);
    return (settings.width != frame->width || settings.height != frame->height) ? move(yuv.stretch(settings.width, frame->width, settings.height, frame->height, false)) : move(yuv);
  };
  frame.rgb = [yuv = frame.yuv]() -> frame::RGB {
    return yuv().rgb(4);
  };
  return move(frame);
}