Libav（ffmpeg）将解码的视频时间戳复制到编码器答案

【问题标题】：Libav (ffmpeg) copying decoded video timestamps to encoderLibav（ffmpeg）将解码的视频时间戳复制到编码器
【发布时间】：2017-03-09 14:02:22
【问题描述】：

我正在编写一个应用程序，它从输入文件（任何编解码器、任何容器）解码单个视频流，进行一堆图像处理，并将结果编码到输出文件（单个视频流、Quicktime RLE、MOV ）。我正在使用 ffmpeg 的 libav 3.1.5（目前是 Windows 构建，但应用程序将是跨平台的）。

输入和输出帧之间存在 1:1 对应关系，我希望输出中的帧时序与输入相同。我真的，真的很难做到这一点。所以我的一般问题是：我如何可靠地（如在所有输入情况下）将输出帧时序设置为与输入相同？

我花了很长时间才通过 API 并达到我现在的目的。我整理了一个最小的测试程序来使用：

#include <cstdio>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}

using namespace std;


struct DecoderStuff {
    AVFormatContext *formatx;
    int nstream;
    AVCodec *codec;
    AVStream *stream;
    AVCodecContext *codecx;
    AVFrame *rawframe;
    AVFrame *rgbframe;
    SwsContext *swsx;
};


struct EncoderStuff {
    AVFormatContext *formatx;
    AVCodec *codec;
    AVStream *stream;
    AVCodecContext *codecx;
};


template <typename T>
static void dump_timebase (const char *what, const T *o) {
    if (o)
        printf("%s timebase: %d/%d\n", what, o->time_base.num, o->time_base.den);
    else
        printf("%s timebase: null object\n", what);
}


// reads next frame into d.rawframe and d.rgbframe. returns false on error/eof.
static bool read_frame (DecoderStuff &d) {

    AVPacket packet;
    int err = 0, haveframe = 0;

    // read
    while (!haveframe && err >= 0 && ((err = av_read_frame(d.formatx, &packet)) >= 0)) {
       if (packet.stream_index == d.nstream) {
           err = avcodec_decode_video2(d.codecx, d.rawframe, &haveframe, &packet);
       }
       av_packet_unref(&packet);
    }

    // error output
    if (!haveframe && err != AVERROR_EOF) {
        char buf[500];
        av_strerror(err, buf, sizeof(buf) - 1);
        buf[499] = 0;
        printf("read_frame: %s\n", buf);
    }

    // convert to rgb
    if (haveframe) {
        sws_scale(d.swsx, d.rawframe->data, d.rawframe->linesize, 0, d.rawframe->height,
                  d.rgbframe->data, d.rgbframe->linesize);
    }

    return haveframe;

}


// writes an output frame, returns false on error.
static bool write_frame (EncoderStuff &e, AVFrame *inframe) {

    // see note in so post about outframe here
    AVFrame *outframe = av_frame_alloc();
    outframe->format = inframe->format;
    outframe->width = inframe->width;
    outframe->height = inframe->height;
    av_image_alloc(outframe->data, outframe->linesize, outframe->width, outframe->height,
                   AV_PIX_FMT_RGB24, 1);
    //av_frame_copy(outframe, inframe);
    static int count = 0;
    for (int n = 0; n < outframe->width * outframe->height; ++ n) {
        outframe->data[0][n*3+0] = ((n+count) % 100) ? 0 : 255;
        outframe->data[0][n*3+1] = ((n+count) % 100) ? 0 : 255;
        outframe->data[0][n*3+2] = ((n+count) % 100) ? 0 : 255;
    }
    ++ count;

    AVPacket packet;
    av_init_packet(&packet);
    packet.size = 0;
    packet.data = NULL;

    int err, havepacket = 0;
    if ((err = avcodec_encode_video2(e.codecx, &packet, outframe, &havepacket)) >= 0 && havepacket) {
        packet.stream_index = e.stream->index;
        err = av_interleaved_write_frame(e.formatx, &packet);
    }

    if (err < 0) {
        char buf[500];
        av_strerror(err, buf, sizeof(buf) - 1);
        buf[499] = 0;
        printf("write_frame: %s\n", buf);
    }

    av_packet_unref(&packet);
    av_freep(&outframe->data[0]);
    av_frame_free(&outframe);

    return err >= 0;

}


int main (int argc, char *argv[]) {

    const char *infile = "wildlife.wmv";
    const char *outfile = "test.mov";
    DecoderStuff d = {};
    EncoderStuff e = {};

    av_register_all();

    // decoder
    avformat_open_input(&d.formatx, infile, NULL, NULL);
    avformat_find_stream_info(d.formatx, NULL);
    d.nstream = av_find_best_stream(d.formatx, AVMEDIA_TYPE_VIDEO, -1, -1, &d.codec, 0);
    d.stream = d.formatx->streams[d.nstream];
    d.codecx = avcodec_alloc_context3(d.codec);
    avcodec_parameters_to_context(d.codecx, d.stream->codecpar);
    avcodec_open2(d.codecx, NULL, NULL);
    d.rawframe = av_frame_alloc();
    d.rgbframe = av_frame_alloc();
    d.rgbframe->format = AV_PIX_FMT_RGB24;
    d.rgbframe->width = d.codecx->width;
    d.rgbframe->height = d.codecx->height;
    av_frame_get_buffer(d.rgbframe, 1);
    d.swsx = sws_getContext(d.codecx->width, d.codecx->height, d.codecx->pix_fmt,
                            d.codecx->width, d.codecx->height, AV_PIX_FMT_RGB24,
                            SWS_POINT, NULL, NULL, NULL);
    //av_dump_format(d.formatx, 0, infile, 0);
    dump_timebase("in stream", d.stream);
    dump_timebase("in stream:codec", d.stream->codec); // note: deprecated
    dump_timebase("in codec", d.codecx);

    // encoder
    avformat_alloc_output_context2(&e.formatx, NULL, NULL, outfile);
    e.codec = avcodec_find_encoder(AV_CODEC_ID_QTRLE);
    e.stream = avformat_new_stream(e.formatx, e.codec);
    e.codecx = avcodec_alloc_context3(e.codec);
    e.codecx->bit_rate = 4000000; // arbitrary for qtrle
    e.codecx->width = d.codecx->width;
    e.codecx->height = d.codecx->height;
    e.codecx->gop_size = 30; // 99% sure this is arbitrary for qtrle
    e.codecx->pix_fmt = AV_PIX_FMT_RGB24;
    e.codecx->time_base = d.stream->time_base; // ???
    e.codecx->flags |= (e.formatx->flags & AVFMT_GLOBALHEADER) ? AV_CODEC_FLAG_GLOBAL_HEADER : 0;
    avcodec_open2(e.codecx, NULL, NULL);
    avcodec_parameters_from_context(e.stream->codecpar, e.codecx); 
    //av_dump_format(e.formatx, 0, outfile, 1);
    dump_timebase("out stream", e.stream);
    dump_timebase("out stream:codec", e.stream->codec); // note: deprecated
    dump_timebase("out codec", e.codecx);

    // open file and write header
    avio_open(&e.formatx->pb, outfile, AVIO_FLAG_WRITE); 
    avformat_write_header(e.formatx, NULL);

    // frames
    while (read_frame(d) && write_frame(e, d.rgbframe))
        ;

    // write trailer and close file
    av_write_trailer(e.formatx);
    avio_closep(&e.formatx->pb); 

}

关于此的几点说明：

由于到目前为止我在帧计时方面的所有尝试都失败了，因此我已经从这段代码中删除了几乎所有与计时相关的内容，以重新开始。
为简洁起见，几乎省略了所有错误检查和清理。
之所以我在write_frame 中分配一个带有新缓冲区的新输出帧，而不是直接使用inframe，是因为这更能代表我的实际应用程序正在执行的操作。我的真实应用也在内部使用 RGB24，因此在此处进行转换。
我在outframe 中生成奇怪模式的原因，而不是使用例如av_copy_frame，是因为我只想要一个使用 Quicktime RLE 压缩得很好的测试模式（否则我的测试输入最终会生成一个 1.7GB 的输出文件）。
我使用的输入视频“wildlife.wmv”可以在here找到。我已经对文件名进行了硬编码。
我知道avcodec_decode_video2 和avcodec_encode_video2 已被弃用，但不在乎。它们工作得很好，我已经很难理解最新版本的 API，ffmpeg 几乎在每个版本中都会改变它们的 API，我现在真的不想和 avcodec_send_* and avcodec_receive_* 打交道。
我想我应该在passing a NULL frame to avcodec_encode_video2 之前完成以刷新一些缓冲区或其他东西，但我对此有点困惑。除非有人愿意解释让我们暂时忽略它，否则这是一个单独的问题。文档对这一点和其他所有内容一样含糊不清。
我的测试输入文件的帧率为 29.97。

现在，至于我目前的尝试。上述代码中存在以下与时间相关的字段，详细信息/混淆以粗体显示。其中有很多，因为 API 令人难以置信的复杂：

main: d.stream->time_base：输入视频流时基。 对于我的测试输入文件，这是 1/1000。
main: d.stream->codec->time_base：不知道这是什么（当你总是使用自己的新上下文时，我永远无法理解为什么AVStream 有一个AVCodecContext 字段）而且codec 字段已被弃用。 对于我的测试输入文件，这是 1/1000。
main: d.codecx->time_base：输入编解码器上下文时基。 对于我的测试输入文件，这是 0/1。我应该设置它吗？
main: e.stream->time_base：我创建的输出流的时基。 我要把它设置成什么？
main: e.stream->codec->time_base：我创建的输出流的已弃用且神秘的编解码器字段的时基。 我是否将其设置为任何值？
main: e.codecx->time_base：我创建的编码器上下文的时基。 我要把它设置成什么？
read_frame: packet.dts：读包的解码时间戳。
read_frame: packet.pts：数据包读取的表示时间戳。
read_frame: packet.duration：数据包读取的持续时间。
read_frame: d.rawframe->pts：原始帧解码的演示时间戳。 这总是 0。为什么解码器不读取它...？
read_frame: d.rgbframe->pts / write_frame: inframe->pts：解码帧转换为RGB的呈现时间戳。当前未设置任何内容。
read_frame: d.rawframe->pkt_*：从数据包中复制的字段，在阅读 this post 后发现。它们设置正确，但我不知道它们是否有用。
write_frame: outframe->pts：正在编码的帧的表示时间戳。 我应该设置这个吗？
write_frame: outframe->pkt_*：来自数据包的定时字段。 我应该设置这些吗？它们似乎被编码器忽略了。
write_frame: packet.dts：正在编码的数据包的解码时间戳。 我要把它设置成什么？
write_frame: packet.pts：正在编码的数据包的表示时间戳。 我要把它设置成什么？
write_frame: packet.duration：数据包被编码的持续时间。 我要把它设置成什么？

我尝试了以下方法，并得到了描述的结果。注意inframe 是d.rgbframe：

- 初始化e.stream->time_base = d.stream->time_base
- 初始化e.codecx->time_base = d.codecx->time_base
- 在read_frame 中设置d.rgbframe->pts = packet.dts
- 在write_frame 中设置outframe->pts = inframe->pts
- 结果：警告未设置编码器时基（自d.codecx->time_base was 0/1），段错误。
- 初始化e.stream->time_base = d.stream->time_base
- 初始化e.codecx->time_base = d.stream->time_base
- 在read_frame 中设置d.rgbframe->pts = packet.dts
- 在write_frame 中设置outframe->pts = inframe->pts
- 结果：没有警告，但 VLC 报告帧速率为 480.048（不知道这个数字来自哪里）并且文件播放太快。 ~~编码器还将packet 中的所有计时字段设置为0，这不是我的预期。~~（编辑：原来这是因为av_interleaved_write_frame，不像av_write_frame，拥有数据包并将其与一个空白交换，我正在打印调用之后的值。所以它们不会被忽略。）
- 初始化e.stream->time_base = d.stream->time_base
- 初始化e.codecx->time_base = d.stream->time_base
- 在read_frame 中设置d.rgbframe->pts = packet.dts
- 将packet 中write_frame 中的任何pts/dts/duration 设置为任何值。
- 结果：未设置有关数据包时间戳的警告。编码器似乎将所有数据包计时字段重置为 0，因此这些都没有任何影响。
- 初始化e.stream->time_base = d.stream->time_base
- 初始化e.codecx->time_base = d.stream->time_base
- 我在阅读this post 后在AVFrame 中找到了这些字段pkt_pts、pkt_dts 和pkt_duration，因此我尝试将这些字段一直复制到outframe。
- 结果：我真的抱有希望，但结果与尝试 3 相同（数据包时间戳未设置警告，结果不正确）。

我尝试了上述各种其他手摇排列，但没有任何效果。我想要做的是创建一个输出文件，它以与输入相同的时间和帧速率（在本例中为 29.97 恒定帧速率）播放。

那么我该怎么做呢？ 在这里数以千计的时间相关字段中，我该怎么做才能使输出与输入相同？以及如何以处理可能将其时间戳和时基存储在不同位置的任意视频输入格式的方式来做到这一点？我需要它始终工作。

作为参考，这里是从我的测试输入文件的视频流中读取的所有数据包和帧时间戳的表格，以了解我的测试文件的外观。没有设置任何输入数据包 pts'，与帧 pts 相同，并且由于某种原因，前 108 帧的持续时间为 0。VLC 可以正常播放文件并报告帧速率为 29.9700089：

Table is here 因为这个帖子太大了。

【问题讨论】：

@halfer 哈，我也是刚回来加个赏金的。
啊，杰森不用担心，我认为答案（和问题）非常好。

标签： c++ ffmpeg video-encoding libav

【解决方案1】：

我认为您的问题在于时基，起初有点令人困惑。

d.stream->time_base: Input video stream time base。这是输入容器中时间戳的分辨率。从av_read_frame 返回的编码帧将在此分辨率下具有其时间戳。
d.stream->codec->time_base: Not sure what this is。为了 API 兼容性，这里留下了旧的 API；您正在使用编解码器参数，因此请忽略它。
d.codecx->time_base: Input codec context time-base. For my test input file this is 0/1. Am I supposed to set it? 这是编解码器的时间戳分辨率（相对于容器）。编解码器将假定其输入编码帧具有此分辨率的时间戳，并且它还将在此分辨率的输出解码帧中设置时间戳。
e.stream->time_base: Time base of the output stream I create。与解码器相同
e.stream->codec->time_base。与 demuxer 相同 - 忽略这个。
e.codecx->time_base - 与解复用器相同

所以你需要做以下事情：

打开解复用器。这部分有效
将解码器时基设置为某个“正常”值，因为解码器可能不会这样做，并且 0/1 不好。如果未设置任何组件的任何时基，事情将无法正常工作。最简单的方法是从 demuxer 复制时基
打开解码器。它可能会更改其时基，也可能不会。
设置编码器时基。最简单的是从（现在打开的）解码器复制时基，因为您没有更改帧速率或任何东西。
打开编码器。它可能会改变它的时基
设置复用器时基。同样，最简单的方法是从编码器复制时基
打开多路复用器。它也可能改变它的时基。

现在对于每一帧：

从分路器中读取
将时间戳从解复用器转换为解码器时基。有av_packet_rescale_ts 可以帮你做到这一点
解码数据包
将帧时间戳（pts）设置为av_frame_get_best_effort_timestamp返回的值
将帧时间戳从解码器转换为编码器时基。使用av_rescale_q 或av_rescale_q_rnd
编码数据包
将时间戳从编码器转换为复用器时基。再次使用av_packet_rescale_ts

这可能有点矫枉过正，特别是编码器可能不会在打开时更改其时基（在这种情况下，您不需要转换原始帧'pts）。

关于刷新 - 您传递给编码器的帧不一定会立即编码和输出，所以是的，您应该使用 NULL 作为帧调用 avcodec_encode_video2，以让编码器知道您已完成并使其输出所有剩余的数据（与所有其他数据包一样，您需要通过复用器）。事实上，你应该重复这样做，直到它停止喷出数据包。有关一些示例，请参见 ffmpeg 内 doc/examples 文件夹中的编码示例之一。

【讨论】：

太棒了。好的，那么对于编码方面，我已经从这个答案中得到了它：在编码器方面，我将e.stream->time_base = d.stream->time_base 设置为一个初始值，然后avformat_write_header 可以根据需要更改它。我将e.codecx->time_base 设置为任何合理的值（我正在使用{1,1000}），我没有意识到这是我的选择，这是一个很大的缺失部分。然后在编码时，我从inframe 的pkt_pts 和pkt_dts 设置packet pts/dts，不设置持续时间，然后让av_packet_rescale_ts 发挥作用。它现在工作正常。现在剩下的问题是……
... 在我的测试输入流中，所有输入数据包 dts 都已设置，但 pts 未设置。如果我直接将这些复制到输出数据包，编码器会给我一个警告“未设置数据包时间戳，这将在未来停止工作”，因为 pts 未设置。那么生成输出 pts/dts 的最稳健的方法是什么？我发现像 if (pts == AV_NOPTS_VALUE) pts = dts 这样的东西适用于 this 流，但这真的是最好的方法吗？ PS 感谢您的冲洗提示。 PPS 为了确认，我会将d.codecx->time_base 设置为avcodec_open2 之前的任何合理值以防万一？
我忘了提一件事，你需要在解码帧后做frame->pts = av_frame_get_best_effort_timestamp(frame);。不要设置输出数据包的时间戳 - 编码器应该根据其输入帧的 pts 值为您执行此操作。是的，在avcodec_open2之前设置解码器时基。
这确实是一个很好的答案，其中大部分都超出了我的想象。感谢您发布它，请继续发布更多相同的内容！如果积分是你的事，我加了 +100 鼓励。

【解决方案2】：

所以，100% 感谢Andrey Turkin's amazingly clear and helpful answer，我已经正常工作了，我想分享我所做的确切事情：

在初始化期间，请理解这些初始时基中的任何一个都可能在某些时候被 libav 更改：

在分配编解码器上下文后立即将解码器编解码器上下文时基初始化为合理的值。我选择了亚毫秒级的分辨率：
```
d.codecx->time_base = { 1, 10000 };
```
在创建新流后立即初始化编码器流时基（注意：在 QtRLE 的情况下，如果我离开这个 {0,0}，它会在写入标头后由编码器设置为 {0,90000} ，但不知道其他情况会不会一样配合，所以在这里初始化）。此时从输入流中复制是安全的，尽管我注意到我也可以任意初始化它（例如 {1,10000}），它仍然可以在以后工作：
```
e.stream->time_base = d.stream->time_base;
```
分配后立即初始化编码器编解码器上下文时基。就从解码器复制而言，与流时基相同：
```
e.codecx->time_base = d.codecx->time_base;
```

我缺少的一件事是我可以设置这些时间戳，而 libav 会服从。没有限制，这取决于我，无论我设置什么解码的时间戳都将在我选择的时间基准中。我没有意识到这一点。

然后在解码时：

我所要做的就是手动填写解码帧 pts。 pkt_* 字段可忽略：
```
d.rawframe->pts = av_frame_get_best_effort_timestamp(d.rawframe);
```
由于我正在转换格式，因此我还将其复制到转换后的帧中：
```
d.rgbframe->pts = d.rawframe->pts;
```

然后，编码：

只需要设置框架的pts。 Libav 将处理数据包。所以就在编码帧之前：
```
outframe->pts = inframe->pts;
```
但是，我仍然必须手动转换数据包时间戳，这看起来很奇怪，但所有这些都非常奇怪，所以我想这是课程的标准。帧时间戳仍在解码器流时基中，因此在对帧进行编码之后但在写入数据包之前：
```
av_packet_rescale_ts(&packet, d.stream->time_base, e.stream->time_base);
```

它的工作原理主要是：我注意到 VLC 报告输入为 29.97 FPS，但输出为 30.03 FPS，我不太明白。但是，我测试过的所有媒体播放器似乎都可以正常播放。

【讨论】：