Format negotiation in libavfilter


Nicolas George



FFmpeg and libavfilter

FFmpeg

Libraries and programs to decode / encode / convert / modify multimedia files and data.

libavfilter

Component of FFmpeg for non-linear filtering.

All source code from FFmpeg 2.8.

Many pixel formats

187 formats for now

enum AVPixelFormat {
    AV_PIX_FMT_NONE = -1,
    AV_PIX_FMT_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
    AV_PIX_FMT_YUYV422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
    AV_PIX_FMT_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...
    AV_PIX_FMT_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...
    AV_PIX_FMT_YUV422P,   ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
    AV_PIX_FMT_YUV444P,   ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
    AV_PIX_FMT_ARGB,      ///< packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
    AV_PIX_FMT_RGBA,      ///< packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
    AV_PIX_FMT_ABGR,      ///< packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
    AV_PIX_FMT_BGRA,      ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
    AV_PIX_FMT_RGB48BE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as big-endian
    AV_PIX_FMT_RGB48LE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as little-endian
    AV_PIX_FMT_VDPAU,     ///< HW acceleration through VDPAU, Picture.data[3] contains a VdpVideoSurface
    AV_PIX_FMT_VDA,          ///< HW acceleration through VDA, data[3] contains a CVPixelBufferRef

Similar formats for audio

Sample formats: depth, packed or planar, 10 formats

enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double
    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar

Channel layouts: known or unknown

#define AV_CH_FRONT_LEFT             0x00000001
#define AV_CH_FRONT_RIGHT            0x00000002
#define AV_CH_FRONT_CENTER           0x00000004
#define AV_CH_LAYOUT_STEREO            (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT)
#define AV_CH_LAYOUT_2POINT1           (AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY)
#define AV_CH_LAYOUT_2_1               (AV_CH_LAYOUT_STEREO|AV_CH_BACK_CENTER)

Sample frequency: fixed or arbitrary value

Handling in codecs

Decoders: output the most convenient format


Encoders: list of supported formats

typedef struct AVCodec {
    const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
    const enum AVPixelFormat *pix_fmts;     ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
    const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
    const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
    const uint64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0

    .sample_fmts     = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_FLT,
                                                      AV_SAMPLE_FMT_NONE },
    .channel_layouts = ff_vorbis_channel_layouts,
    .supported_samplerates = libopus_sample_rates,

The problem for filters

Constraints:

Several input/outputs: consistency constraints

Implement all formats in all filters: too much work, useless

Use generic code for all formats: too complex, not really possible

Convert to an universal format: too inefficient


Solution:

select a format that work for all filters

if not possible, insert conversion filters at selected places

Common cases

“Pass-through” filters

examples: delogo, curves, crop


Conversion filters


examples: scale, aresample

Uncommon cases

amerge

layout = union of in layouts

overlay

Both inputs must be similar except for alpha;

output identical to main input

histogram

Many in formats;

out format must have similar bit depth

Implementation


The AVFilterFormats structure

struct AVFilterFormats {
    unsigned nb_formats;        ///< number of formats
    int *formats;               ///< list of media formats
    unsigned refcount;          ///< number of references to this list
    struct AVFilterFormats ***refs; ///< references to this list
};

The query_formats() method

    /**
     * Query formats supported by the filter on its inputs and outputs.
     *
     * This callback is called after the filter is initialized (so the inputs
     * and outputs are fixed), shortly before the format negotiation. This
     * callback may be called more than once.
     */
    int (*query_formats)(AVFilterContext *);

Example: alphamerge

static int query_formats(AVFilterContext *ctx)
{
    static const enum AVPixelFormat main_fmts[] = {
        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P,
        AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA, AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR,
        AV_PIX_FMT_NONE
    };
    static const enum AVPixelFormat alpha_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
    AVFilterFormats *main_formats = ff_make_format_list(main_fmts);
    AVFilterFormats *alpha_formats = ff_make_format_list(alpha_fmts);
    ff_formats_ref(main_formats, &ctx->inputs[0]->out_formats);
    ff_formats_ref(alpha_formats, &ctx->inputs[1]->out_formats);
    ff_formats_ref(main_formats, &ctx->outputs[0]->in_formats);
    return 0;
}

ff_formats_ref()

#define FORMATS_REF(f, ref)                                                     \
    void *tmp;                                                                  \
                                                                                \
    if (!ref)                                                                   \
        return AVERROR_BUG;                                                     \
                                                                                \
    tmp = av_realloc_array(f->refs, sizeof(*f->refs), f->refcount + 1);         \
    if (!tmp)                                                                   \
        return AVERROR(ENOMEM);                                                 \
    f->refs = tmp;                                                              \
    f->refs[f->refcount++] = ref;                                               \
    *ref = f;                                                                   \
    return 0

int ff_channel_layouts_ref(AVFilterChannelLayouts *f, AVFilterChannelLayouts **ref)
{
    FORMATS_REF(f, ref);
}

int ff_formats_ref(AVFilterFormats *f, AVFilterFormats **ref)
{
    FORMATS_REF(f, ref);
}

Expressing constraints

scale: in and out formats are independant

vstack: all formats must be the same

ff_set_common_formats()

#define SET_COMMON_FORMATS(ctx, fmts, in_fmts, out_fmts, ref, list) \
    int count = 0, i;                                               \
    if (!fmts)                                                      \
        return AVERROR_BUG;                                         \
    for (i = 0; i < ctx->nb_inputs; i++) {                          \
        if (ctx->inputs[i] && !ctx->inputs[i]->out_fmts) {          \
            int ret = ref(fmts, &ctx->inputs[i]->out_fmts);         \
            if (ret < 0)                                            \
                return ret;                                         \
            count++;                                                \
        }                                                           \
    }                                                               \
    for (i = 0; i < ctx->nb_outputs; i++) {                         \
        if (ctx->outputs[i] && !ctx->outputs[i]->in_fmts) {         \
            int ret = ref(fmts, &ctx->outputs[i]->in_fmts);         \
            if (ret < 0)                                            \
                return ret;                                         \
            count++;                                                \
        }                                                           \
    }                                                               \
    if (!count) {                                                   \
        av_freep(&fmts->list);                                      \
        av_freep(&fmts->refs);                                      \
        av_freep(&fmts);                                            \
    }                                                               \
    return 0;

Merging formats lists



ff_merge_formats()

#define MERGE_FORMATS(ret, a, b, fmts, nb, type, fail)                          \
do {                                                                            \
    int i, j, k = 0, count = FFMIN(a->nb, b->nb);                               \
    if (!(ret = av_mallocz(sizeof(*ret))))                                      \
        goto fail;                                                              \
    if (count) {                                                                \
        if (!(ret->fmts = av_malloc_array(count, sizeof(*ret->fmts))))          \
            goto fail;                                                          \
        for (i = 0; i < a->nb; i++)                                             \
            for (j = 0; j < b->nb; j++)                                         \
                if (a->fmts[i] == b->fmts[j]) {                                 \
                    if(k >= FFMIN(a->nb, b->nb)){                               \
                        av_log(NULL, AV_LOG_ERROR, "Duplicate formats in avfilter_merge_formats() detected\n"); \
                        av_free(ret->fmts);                                     \
                        av_free(ret);                                           \
                        return NULL;                                            \
                    }                                                           \
                    ret->fmts[k++] = a->fmts[i];                                \
                }                                                               \
    }                                                                           \
    ret->nb = k;                                                                \
    /* check that there was at least one common format */                       \
    if (!ret->nb)                                                               \
        goto fail;                                                              \
    MERGE_REF(ret, a, fmts, type, fail);                                        \
    MERGE_REF(ret, b, fmts, type, fail);                                        \
} while (0)

MERGE_REFS()

#define MERGE_REF(ret, a, fmts, type, fail)                                \
do {                                                                       \
    type ***tmp;                                                           \
    int i;                                                                 \
                                                                           \
    if (!(tmp = av_realloc_array(ret->refs, ret->refcount + a->refcount,   \
                                 sizeof(*tmp))))                           \
        goto fail;                                                         \
    ret->refs = tmp;                                                       \
                                                                           \
    for (i = 0; i < a->refcount; i ++) {                                   \
        ret->refs[ret->refcount] = a->refs[i];                             \
        *ret->refs[ret->refcount++] = ret;                                 \
    }                                                                      \
                                                                           \
    av_freep(&a->refs);                                                    \
    av_freep(&a->fmts);                                                    \
    av_freep(&a);                                                          \
} while (0)

Global merging: query

/**
 * Perform one round of query_formats() and merging formats lists on the
 * filter graph.
 * @return  >=0 if all links formats lists could be queried and merged;
 */
static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
{
    int i, j, ret;
    int scaler_count = 0, resampler_count = 0;
    int count_queried = 0;        /* successful calls to query_formats() */
    int count_merged = 0;         /* successful merge of formats lists */
    int count_already_merged = 0; /* lists already merged */
    int count_delayed = 0;        /* lists that need to be merged later */

    for (i = 0; i < graph->nb_filters; i++) {
        AVFilterContext *f = graph->filters[i];
        if (formats_declared(f))
            continue;
        if (f->filter->query_formats)
            ret = filter_query_formats(f);
        else
            ret = ff_default_query_formats(f);
        if (ret < 0 && ret != AVERROR(EAGAIN))
            return ret;
        /* note: EAGAIN could indicate a partial success, not counted yet */
        count_queried += ret >= 0;
    }

Global merging: merging

    /* go through and merge as many format lists as possible */
    for (i = 0; i < graph->nb_filters; i++) {
        AVFilterContext *filter = graph->filters[i];

        for (j = 0; j < filter->nb_inputs; j++) {
            AVFilterLink *link = filter->inputs[j];
            int convert_needed = 0;

            if (!link)
                continue;

                if (!ff_merge_formats(link->in_formats, link->out_formats,
                                      link->type))
                    convert_needed = 1;

Global merging: automatic conversion

    if (convert_needed) {
        AVFilterContext *convert;
        AVFilter *filter;
        AVFilterLink *inlink, *outlink;
        char scale_args[256];
        char inst_name[30];

        /* couldn't merge format lists. auto-insert conversion filter */
        switch (link->type) {
        case AVMEDIA_TYPE_VIDEO:
            if (!(filter = avfilter_get_by_name("scale"))) {
                av_log(log_ctx, AV_LOG_ERROR, "'scale' filter "
                       "not present, cannot convert pixel formats.\n");
                return AVERROR(EINVAL);
            }

            snprintf(inst_name, sizeof(inst_name), "auto-inserted scaler %d",
                     scaler_count++);

            if ((ret = avfilter_graph_create_filter(&convert, filter,
                                                    inst_name, graph->scale_sws_opts, NULL,
                                                    graph)) < 0)
                return ret;
            break;
        }

        if ((ret = avfilter_insert_filter(link, convert, 0, 0)) < 0)
            return ret;

Global merging: keep only one

static int pick_format(AVFilterLink *link, AVFilterLink *ref)
{
    if (link->type == AVMEDIA_TYPE_VIDEO) {
        if(ref && ref->type == AVMEDIA_TYPE_VIDEO){
            int has_alpha= av_pix_fmt_desc_get(ref->format)->nb_components % 2 == 0;
            enum AVPixelFormat best= AV_PIX_FMT_NONE;
            int i;
            for (i=0; i<link->in_formats->nb_formats; i++) {
                enum AVPixelFormat p = link->in_formats->formats[i];
                best= av_find_best_pix_fmt_of_2(best, p, ref->format, has_alpha, NULL);
            }
            av_log(link->src,AV_LOG_DEBUG, "picking %s out of %d ref:%s alpha:%d\n",
                   av_get_pix_fmt_name(best), link->in_formats->nb_formats,
                   av_get_pix_fmt_name(ref->format), has_alpha);
            link->in_formats->formats[0] = best;
        }
    }

    link->in_formats->nb_formats = 1;
    link->format = link->in_formats->formats[0];

    ff_formats_unref(&link->in_formats);
    ff_formats_unref(&link->out_formats);
    return 0;
}

ref: reference link, select most similar (alpha, colorspace)

Global merging: choose in order

First: inputs with only one possible format

    for (j = 0; j < filter->nb_inputs; j++){
        if(filter->inputs[j]->in_formats && filter->inputs[j]->in_formats->nb_formats == 1) {
            if ((ret = pick_format(filter->inputs[j], NULL)) < 0)
                return ret;
            change = 1;

Second: ditto for outputs

    for (j = 0; j < filter->nb_outputs; j++){
        if(filter->outputs[j]->in_formats && filter->outputs[j]->in_formats->nb_formats == 1) {
            if ((ret = pick_format(filter->outputs[j], NULL)) < 0)
                return ret;
            change = 1;

Third: outputs where first input already done: use as ref

if (filter->nb_inputs && filter->nb_outputs && filter->inputs[0]->format>=0) {
    for (j = 0; j < filter->nb_outputs; j++) {
        if(filter->outputs[j]->format<0) {
            if ((ret = pick_format(filter->outputs[j], filter->inputs[0])) < 0)
                return ret;
            change = 1;

Loop while progress is made and expedite the rest

    }while(change);
    for (i = 0; i < graph->nb_filters; i++) {
        AVFilterContext *filter = graph->filters[i];
            if ((ret = pick_format(filter->inputs[j], NULL)) < 0)

Final: configure the filters

int avfilter_config_links(AVFilterContext *filter)
{
        switch (link->init_state) {
        case AVLINK_INIT:
            continue;
        case AVLINK_STARTINIT:
            av_log(filter, AV_LOG_INFO, "circular filter chain detected\n");
            return 0;
        case AVLINK_UNINIT:
            link->init_state = AVLINK_STARTINIT;

            if ((ret = avfilter_config_links(link->src)) < 0)
                return ret;

            if (!(config_link = link->srcpad->config_props)) {
                if (link->src->nb_inputs != 1) {
                    return AVERROR(EINVAL);
                }
            } else if ((ret = config_link(link)) < 0) {
                av_log(link->src, AV_LOG_ERROR,
                       "Failed to configure output pad on %s\n",
                       link->src->name);
                return ret;
            }
            link->init_state = AVLINK_INIT;

Recursive to configure inputs before outputs

And voilà!


Finished? Not so fast!


Sample rates

Most filters accept any sample rate; AVFilterFormats is optimized for small lists. Hack: treat empty lists as full lists.

AVFilterFormats *ff_merge_samplerates(AVFilterFormats *a,
                                      AVFilterFormats *b)
{
    if (a->nb_formats && b->nb_formats) {
        MERGE_FORMATS(ret, a, b, formats, nb_formats, AVFilterFormats, fail);
    } else if (a->nb_formats) {
        MERGE_REF(a, b, formats, AVFilterFormats, fail);
        ret = a;
    } else {
        MERGE_REF(b, a, formats, AVFilterFormats, fail);
        ret = b;
    }
    return ret;
}

#define REDUCE_FORMATS(fmt_type, list_type, list, var, nb, add_format) \
                                                                       \
            if (!out_link->in_ ## list->nb) {                          \
                add_format(&out_link->in_ ##list, fmt);                \
                ret = 1;                                               \
                break;                                                 \
            }                                                          \

Channel layouts


Extra constraint: keep code differences with the libav fork minimal.

AVFilterChannelLayouts

/**
 * A list of supported channel layouts.
 *
 * The list works the same as AVFilterFormats, except for the following
 * differences:
 * - A list with all_layouts = 1 means all channel layouts with a known
 *   disposition; nb_channel_layouts must then be 0.
 * - A list with all_counts = 1 means all channel counts, with a known or
 *   unknown disposition; nb_channel_layouts must then be 0 and all_layouts 1.
 * - The list must not contain a layout with a known disposition and a
 *   channel count with unknown disposition with the same number of channels
 *   (e.g. AV_CH_LAYOUT_STEREO and FF_COUNT2LAYOUT(2).
 */
typedef struct AVFilterChannelLayouts {
    uint64_t *channel_layouts;  ///< list of channel layouts
    int    nb_channel_layouts;  ///< number of channel layouts
    char all_layouts;           ///< accept any known channel layout
    char all_counts;            ///< accept any channel layout or count

    unsigned refcount;          ///< number of references to this list
    struct AVFilterChannelLayouts ***refs; ///< references to this list
} AVFilterChannelLayouts;

#define FF_COUNT2LAYOUT(c) (0x8000000000000000ULL | (c))
#define FF_LAYOUT2COUNT(l) (((l) & 0x8000000000000000ULL) ? \
                           (int)((l) & 0x7FFFFFFF) : 0)

ff_merge_channel_layouts()

AVFilterChannelLayouts *ff_merge_channel_layouts(AVFilterChannelLayouts *a,
                                                 AVFilterChannelLayouts *b)
{
    /* a[known] intersect b[known] */
    for (i = 0; i < a->nb_channel_layouts; i++) {
        if (!KNOWN(a->channel_layouts[i]))
            continue;
        for (j = 0; j < b->nb_channel_layouts; j++) {
            if (a->channel_layouts[i] == b->channel_layouts[j]) {
                ret->channel_layouts[ret_nb++] = a->channel_layouts[i];
                a->channel_layouts[i] = b->channel_layouts[j] = 0;
            }
        }
    }
    /* 1st round: a[known] intersect b[generic]
       2nd round: a[generic] intersect b[known] */
    for (round = 0; round < 2; round++) {
        for (i = 0; i < a->nb_channel_layouts; i++) {
            uint64_t fmt = a->channel_layouts[i], bfmt;
            if (!fmt || !KNOWN(fmt))
                continue;
            bfmt = FF_COUNT2LAYOUT(av_get_channel_layout_nb_channels(fmt));
            for (j = 0; j < b->nb_channel_layouts; j++)
                if (b->channel_layouts[j] == bfmt)
                    ret->channel_layouts[ret_nb++] = a->channel_layouts[i];
        }
        /* 1st round: swap to prepare 2nd round; 2nd round: put it back */
        FFSWAP(AVFilterChannelLayouts *, a, b);
    }

Avoid unnecessary audio conversions

#define REDUCE_FORMATS(fmt_type, list_type, list, var, nb, add_format) \
do {                                                                   \
    for (i = 0; i < filter->nb_inputs; i++) {                          \
        AVFilterLink *link = filter->inputs[i];                        \
        fmt_type fmt;                                                  \
        if (!link->out_ ## list || link->out_ ## list->nb != 1)        \
            continue;                                                  \
        fmt = link->out_ ## list->var[0];                              \
                                                                       \
        for (j = 0; j < filter->nb_outputs; j++) {                     \
            AVFilterLink *out_link = filter->outputs[j];               \
            list_type *fmts;                                           \
                                                                       \
            if (link->type != out_link->type ||                        \
                out_link->in_ ## list->nb == 1)                        \
                continue;                                              \
            fmts = out_link->in_ ## list;                              \
            for (k = 0; k < out_link->in_ ## list->nb; k++)            \
                if (fmts->var[k] == fmt) {                             \
                    fmts->var[0]  = fmt;                               \
                    fmts->nb = 1;                                      \
                    ret = 1;                                           \
                    break;                                             \
                }                                                      \
        }                                                              \
    }                                                                  \
} while (0)

Useful e.g. if incompatible sample formats but compatible sample rates.

Phantom constraints


            if (link->in_formats != link->out_formats
                && link->in_formats && link->out_formats)
                if (!can_merge_formats(link->in_formats, link->out_formats,
                                      link->type, 0))
                    convert_needed = 1;
            if (link->type == AVMEDIA_TYPE_AUDIO) {
                if (link->in_samplerates != link->out_samplerates
                    && link->in_samplerates && link->out_samplerates)
                    if (!can_merge_formats(link->in_samplerates,
                                           link->out_samplerates,
                                           0, 1))
                        convert_needed = 1;
            }

can_merge_formats() makes a copy of the list without refs.

Poor formats choices

Example: { yuv420p, gray8 } ∩ { rgb24, gray8 } = { gray8 }.

Better convert yuv420p → rgb24 than select gray8 and lose color.


    /* Do not lose chroma or alpha in merging.
       It happens if both lists have formats with chroma (resp. alpha), but
       the only formats in common do not have it (e.g. YUV+gray vs.
       RGB+gray): in that case, the merging would select the gray format,
       possibly causing a lossy conversion elsewhere in the graph.
       To avoid that, pretend that there are no common formats to force the
       insertion of a conversion filter. */
    if (type == AVMEDIA_TYPE_VIDEO)
        for (i = 0; i < a->nb_formats; i++)
            for (j = 0; j < b->nb_formats; j++) {
                const AVPixFmtDescriptor *adesc = av_pix_fmt_desc_get(a->formats[i]);
                const AVPixFmtDescriptor *bdesc = av_pix_fmt_desc_get(b->formats[j]);
                alpha2 |= adesc->flags & bdesc->flags & AV_PIX_FMT_FLAG_ALPHA;
                chroma2|= adesc->nb_components > 1 && bdesc->nb_components > 1;
                if (a->formats[i] == b->formats[j]) {
                    alpha1 |= adesc->flags & AV_PIX_FMT_FLAG_ALPHA;
                    chroma1|= adesc->nb_components > 1;
                }
            }
    // If chroma or alpha can be lost through merging then do not merge
    if (alpha2 > alpha1 || chroma2 > chroma1)
        return NULL;

Complex constraints

    while ((ret = query_formats(graph, log_ctx)) == AVERROR(EAGAIN))
        av_log(graph, AV_LOG_DEBUG, "query_formats not finished\n");

        if (count_queried || count_merged)
            return AVERROR(EAGAIN);
        av_log(graph, AV_LOG_ERROR,
               "The following filters could not choose their formats: %s\n"
               "Consider inserting the (a)format filter near their input or "
               "output.\n", bp.str);

Example: amerge: out layout = union of in layouts

        if (!ctx->inputs[i]->in_channel_layouts ||
            !ctx->inputs[i]->in_channel_layouts->nb_channel_layouts) {
            av_log(ctx, AV_LOG_WARNING,
                   "No channel layout for input %d\n", i + 1);
            return AVERROR(EAGAIN);
        }
        inlayout[i] = ctx->inputs[i]->in_channel_layouts->channel_layouts[0];

Problem: fragile, does not work when many filters need it.

Future directions



More robust system for complex constraints


Partial graph reconfiguration