Libraries and programs to decode / encode / convert / modify multimedia files and data.
Component of FFmpeg for non-linear filtering.
All source code from FFmpeg 2.8.
187 formats for now
enum AVPixelFormat { AV_PIX_FMT_NONE = -1, AV_PIX_FMT_YUV420P, ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples) AV_PIX_FMT_YUYV422, ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr AV_PIX_FMT_RGB24, ///< packed RGB 8:8:8, 24bpp, RGBRGB... AV_PIX_FMT_BGR24, ///< packed RGB 8:8:8, 24bpp, BGRBGR... AV_PIX_FMT_YUV422P, ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples) AV_PIX_FMT_YUV444P, ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples) AV_PIX_FMT_ARGB, ///< packed ARGB 8:8:8:8, 32bpp, ARGBARGB... AV_PIX_FMT_RGBA, ///< packed RGBA 8:8:8:8, 32bpp, RGBARGBA... AV_PIX_FMT_ABGR, ///< packed ABGR 8:8:8:8, 32bpp, ABGRABGR... AV_PIX_FMT_BGRA, ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA... AV_PIX_FMT_RGB48BE, ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as big-endian AV_PIX_FMT_RGB48LE, ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as little-endian AV_PIX_FMT_VDPAU, ///< HW acceleration through VDPAU, Picture.data[3] contains a VdpVideoSurface AV_PIX_FMT_VDA, ///< HW acceleration through VDA, data[3] contains a CVPixelBufferRef
Sample formats: depth, packed or planar, 10 formats
enum AVSampleFormat { AV_SAMPLE_FMT_NONE = -1, AV_SAMPLE_FMT_U8, ///< unsigned 8 bits AV_SAMPLE_FMT_S16, ///< signed 16 bits AV_SAMPLE_FMT_S32, ///< signed 32 bits AV_SAMPLE_FMT_FLT, ///< float AV_SAMPLE_FMT_DBL, ///< double AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
Channel layouts: known or unknown
#define AV_CH_FRONT_LEFT 0x00000001 #define AV_CH_FRONT_RIGHT 0x00000002 #define AV_CH_FRONT_CENTER 0x00000004 #define AV_CH_LAYOUT_STEREO (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT) #define AV_CH_LAYOUT_2POINT1 (AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY) #define AV_CH_LAYOUT_2_1 (AV_CH_LAYOUT_STEREO|AV_CH_BACK_CENTER)
Sample frequency: fixed or arbitrary value
Decoders: output the most convenient format
Encoders: list of supported formats
typedef struct AVCodec { const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0} const enum AVPixelFormat *pix_fmts; ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1 const int *supported_samplerates; ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0 const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1 const uint64_t *channel_layouts; ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE }, .channel_layouts = ff_vorbis_channel_layouts, .supported_samplerates = libopus_sample_rates,
Several input/outputs: consistency constraints
Implement all formats in all filters: too much work, useless
Use generic code for all formats: too complex, not really possible
Convert to an universal format: too inefficient
select a format that work for all filters
if not possible, insert conversion filters at selected places
examples: delogo, curves, crop
examples: scale, aresample
layout = union of in layouts
Both inputs must be similar except for alpha;
output identical to main input
Many in formats;
out format must have similar bit depth
struct AVFilterFormats { unsigned nb_formats; ///< number of formats int *formats; ///< list of media formats unsigned refcount; ///< number of references to this list struct AVFilterFormats ***refs; ///< references to this list };
/** * Query formats supported by the filter on its inputs and outputs. * * This callback is called after the filter is initialized (so the inputs * and outputs are fixed), shortly before the format negotiation. This * callback may be called more than once. */ int (*query_formats)(AVFilterContext *);
static int query_formats(AVFilterContext *ctx) { static const enum AVPixelFormat main_fmts[] = { AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA, AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR, AV_PIX_FMT_NONE }; static const enum AVPixelFormat alpha_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE }; AVFilterFormats *main_formats = ff_make_format_list(main_fmts); AVFilterFormats *alpha_formats = ff_make_format_list(alpha_fmts); ff_formats_ref(main_formats, &ctx->inputs[0]->out_formats); ff_formats_ref(alpha_formats, &ctx->inputs[1]->out_formats); ff_formats_ref(main_formats, &ctx->outputs[0]->in_formats); return 0; }
#define FORMATS_REF(f, ref) \ void *tmp; \ \ if (!ref) \ return AVERROR_BUG; \ \ tmp = av_realloc_array(f->refs, sizeof(*f->refs), f->refcount + 1); \ if (!tmp) \ return AVERROR(ENOMEM); \ f->refs = tmp; \ f->refs[f->refcount++] = ref; \ *ref = f; \ return 0 int ff_channel_layouts_ref(AVFilterChannelLayouts *f, AVFilterChannelLayouts **ref) { FORMATS_REF(f, ref); } int ff_formats_ref(AVFilterFormats *f, AVFilterFormats **ref) { FORMATS_REF(f, ref); }
scale: in and out formats are independant
vstack: all formats must be the same
#define SET_COMMON_FORMATS(ctx, fmts, in_fmts, out_fmts, ref, list) \ int count = 0, i; \ if (!fmts) \ return AVERROR_BUG; \ for (i = 0; i < ctx->nb_inputs; i++) { \ if (ctx->inputs[i] && !ctx->inputs[i]->out_fmts) { \ int ret = ref(fmts, &ctx->inputs[i]->out_fmts); \ if (ret < 0) \ return ret; \ count++; \ } \ } \ for (i = 0; i < ctx->nb_outputs; i++) { \ if (ctx->outputs[i] && !ctx->outputs[i]->in_fmts) { \ int ret = ref(fmts, &ctx->outputs[i]->in_fmts); \ if (ret < 0) \ return ret; \ count++; \ } \ } \ if (!count) { \ av_freep(&fmts->list); \ av_freep(&fmts->refs); \ av_freep(&fmts); \ } \ return 0;
#define MERGE_FORMATS(ret, a, b, fmts, nb, type, fail) \ do { \ int i, j, k = 0, count = FFMIN(a->nb, b->nb); \ if (!(ret = av_mallocz(sizeof(*ret)))) \ goto fail; \ if (count) { \ if (!(ret->fmts = av_malloc_array(count, sizeof(*ret->fmts)))) \ goto fail; \ for (i = 0; i < a->nb; i++) \ for (j = 0; j < b->nb; j++) \ if (a->fmts[i] == b->fmts[j]) { \ if(k >= FFMIN(a->nb, b->nb)){ \ av_log(NULL, AV_LOG_ERROR, "Duplicate formats in avfilter_merge_formats() detected\n"); \ av_free(ret->fmts); \ av_free(ret); \ return NULL; \ } \ ret->fmts[k++] = a->fmts[i]; \ } \ } \ ret->nb = k; \ /* check that there was at least one common format */ \ if (!ret->nb) \ goto fail; \ MERGE_REF(ret, a, fmts, type, fail); \ MERGE_REF(ret, b, fmts, type, fail); \ } while (0)
#define MERGE_REF(ret, a, fmts, type, fail) \ do { \ type ***tmp; \ int i; \ \ if (!(tmp = av_realloc_array(ret->refs, ret->refcount + a->refcount, \ sizeof(*tmp)))) \ goto fail; \ ret->refs = tmp; \ \ for (i = 0; i < a->refcount; i ++) { \ ret->refs[ret->refcount] = a->refs[i]; \ *ret->refs[ret->refcount++] = ret; \ } \ \ av_freep(&a->refs); \ av_freep(&a->fmts); \ av_freep(&a); \ } while (0)
/** * Perform one round of query_formats() and merging formats lists on the * filter graph. * @return >=0 if all links formats lists could be queried and merged; */ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx) { int i, j, ret; int scaler_count = 0, resampler_count = 0; int count_queried = 0; /* successful calls to query_formats() */ int count_merged = 0; /* successful merge of formats lists */ int count_already_merged = 0; /* lists already merged */ int count_delayed = 0; /* lists that need to be merged later */ for (i = 0; i < graph->nb_filters; i++) { AVFilterContext *f = graph->filters[i]; if (formats_declared(f)) continue; if (f->filter->query_formats) ret = filter_query_formats(f); else ret = ff_default_query_formats(f); if (ret < 0 && ret != AVERROR(EAGAIN)) return ret; /* note: EAGAIN could indicate a partial success, not counted yet */ count_queried += ret >= 0; }
/* go through and merge as many format lists as possible */ for (i = 0; i < graph->nb_filters; i++) { AVFilterContext *filter = graph->filters[i]; for (j = 0; j < filter->nb_inputs; j++) { AVFilterLink *link = filter->inputs[j]; int convert_needed = 0; if (!link) continue; if (!ff_merge_formats(link->in_formats, link->out_formats, link->type)) convert_needed = 1;
if (convert_needed) { AVFilterContext *convert; AVFilter *filter; AVFilterLink *inlink, *outlink; char scale_args[256]; char inst_name[30]; /* couldn't merge format lists. auto-insert conversion filter */ switch (link->type) { case AVMEDIA_TYPE_VIDEO: if (!(filter = avfilter_get_by_name("scale"))) { av_log(log_ctx, AV_LOG_ERROR, "'scale' filter " "not present, cannot convert pixel formats.\n"); return AVERROR(EINVAL); } snprintf(inst_name, sizeof(inst_name), "auto-inserted scaler %d", scaler_count++); if ((ret = avfilter_graph_create_filter(&convert, filter, inst_name, graph->scale_sws_opts, NULL, graph)) < 0) return ret; break; } if ((ret = avfilter_insert_filter(link, convert, 0, 0)) < 0) return ret;
static int pick_format(AVFilterLink *link, AVFilterLink *ref) { if (link->type == AVMEDIA_TYPE_VIDEO) { if(ref && ref->type == AVMEDIA_TYPE_VIDEO){ int has_alpha= av_pix_fmt_desc_get(ref->format)->nb_components % 2 == 0; enum AVPixelFormat best= AV_PIX_FMT_NONE; int i; for (i=0; i<link->in_formats->nb_formats; i++) { enum AVPixelFormat p = link->in_formats->formats[i]; best= av_find_best_pix_fmt_of_2(best, p, ref->format, has_alpha, NULL); } av_log(link->src,AV_LOG_DEBUG, "picking %s out of %d ref:%s alpha:%d\n", av_get_pix_fmt_name(best), link->in_formats->nb_formats, av_get_pix_fmt_name(ref->format), has_alpha); link->in_formats->formats[0] = best; } } link->in_formats->nb_formats = 1; link->format = link->in_formats->formats[0]; ff_formats_unref(&link->in_formats); ff_formats_unref(&link->out_formats); return 0; }
ref: reference link, select most similar (alpha, colorspace)
First: inputs with only one possible format
for (j = 0; j < filter->nb_inputs; j++){ if(filter->inputs[j]->in_formats && filter->inputs[j]->in_formats->nb_formats == 1) { if ((ret = pick_format(filter->inputs[j], NULL)) < 0) return ret; change = 1;
Second: ditto for outputs
for (j = 0; j < filter->nb_outputs; j++){ if(filter->outputs[j]->in_formats && filter->outputs[j]->in_formats->nb_formats == 1) { if ((ret = pick_format(filter->outputs[j], NULL)) < 0) return ret; change = 1;
Third: outputs where first input already done: use as ref
if (filter->nb_inputs && filter->nb_outputs && filter->inputs[0]->format>=0) { for (j = 0; j < filter->nb_outputs; j++) { if(filter->outputs[j]->format<0) { if ((ret = pick_format(filter->outputs[j], filter->inputs[0])) < 0) return ret; change = 1;
Loop while progress is made and expedite the rest
}while(change); for (i = 0; i < graph->nb_filters; i++) { AVFilterContext *filter = graph->filters[i]; if ((ret = pick_format(filter->inputs[j], NULL)) < 0)
int avfilter_config_links(AVFilterContext *filter) { switch (link->init_state) { case AVLINK_INIT: continue; case AVLINK_STARTINIT: av_log(filter, AV_LOG_INFO, "circular filter chain detected\n"); return 0; case AVLINK_UNINIT: link->init_state = AVLINK_STARTINIT; if ((ret = avfilter_config_links(link->src)) < 0) return ret; if (!(config_link = link->srcpad->config_props)) { if (link->src->nb_inputs != 1) { return AVERROR(EINVAL); } } else if ((ret = config_link(link)) < 0) { av_log(link->src, AV_LOG_ERROR, "Failed to configure output pad on %s\n", link->src->name); return ret; } link->init_state = AVLINK_INIT;
Recursive to configure inputs before outputs
Finished? Not so fast!
Most filters accept any sample rate; AVFilterFormats is optimized for small lists. Hack: treat empty lists as full lists.
AVFilterFormats *ff_merge_samplerates(AVFilterFormats *a, AVFilterFormats *b) { if (a->nb_formats && b->nb_formats) { MERGE_FORMATS(ret, a, b, formats, nb_formats, AVFilterFormats, fail); } else if (a->nb_formats) { MERGE_REF(a, b, formats, AVFilterFormats, fail); ret = a; } else { MERGE_REF(b, a, formats, AVFilterFormats, fail); ret = b; } return ret; }
#define REDUCE_FORMATS(fmt_type, list_type, list, var, nb, add_format) \ \ if (!out_link->in_ ## list->nb) { \ add_format(&out_link->in_ ##list, fmt); \ ret = 1; \ break; \ } \
#define AV_CH_LAYOUT_STEREO (AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT) #define AV_CH_LAYOUT_5POINT1_BACK (AV_CH_LAYOUT_5POINT0_BACK|AV_CH_LOW_FREQUENCY)
struct AVFilterLink { uint64_t channel_layout; ///< channel layout of current buffer (see libavutil/channel_layout.h) int channels;
av_assert0(channels == av_get_channel_layout_nb_channels(link->channel_layout) || !av_get_channel_layout_nb_channels(link->channel_layout));
Extra constraint: keep code differences with the libav fork minimal.
/** * A list of supported channel layouts. * * The list works the same as AVFilterFormats, except for the following * differences: * - A list with all_layouts = 1 means all channel layouts with a known * disposition; nb_channel_layouts must then be 0. * - A list with all_counts = 1 means all channel counts, with a known or * unknown disposition; nb_channel_layouts must then be 0 and all_layouts 1. * - The list must not contain a layout with a known disposition and a * channel count with unknown disposition with the same number of channels * (e.g. AV_CH_LAYOUT_STEREO and FF_COUNT2LAYOUT(2). */ typedef struct AVFilterChannelLayouts { uint64_t *channel_layouts; ///< list of channel layouts int nb_channel_layouts; ///< number of channel layouts char all_layouts; ///< accept any known channel layout char all_counts; ///< accept any channel layout or count unsigned refcount; ///< number of references to this list struct AVFilterChannelLayouts ***refs; ///< references to this list } AVFilterChannelLayouts; #define FF_COUNT2LAYOUT(c) (0x8000000000000000ULL | (c)) #define FF_LAYOUT2COUNT(l) (((l) & 0x8000000000000000ULL) ? \ (int)((l) & 0x7FFFFFFF) : 0)
AVFilterChannelLayouts *ff_merge_channel_layouts(AVFilterChannelLayouts *a, AVFilterChannelLayouts *b) { /* a[known] intersect b[known] */ for (i = 0; i < a->nb_channel_layouts; i++) { if (!KNOWN(a->channel_layouts[i])) continue; for (j = 0; j < b->nb_channel_layouts; j++) { if (a->channel_layouts[i] == b->channel_layouts[j]) { ret->channel_layouts[ret_nb++] = a->channel_layouts[i]; a->channel_layouts[i] = b->channel_layouts[j] = 0; } } } /* 1st round: a[known] intersect b[generic] 2nd round: a[generic] intersect b[known] */ for (round = 0; round < 2; round++) { for (i = 0; i < a->nb_channel_layouts; i++) { uint64_t fmt = a->channel_layouts[i], bfmt; if (!fmt || !KNOWN(fmt)) continue; bfmt = FF_COUNT2LAYOUT(av_get_channel_layout_nb_channels(fmt)); for (j = 0; j < b->nb_channel_layouts; j++) if (b->channel_layouts[j] == bfmt) ret->channel_layouts[ret_nb++] = a->channel_layouts[i]; } /* 1st round: swap to prepare 2nd round; 2nd round: put it back */ FFSWAP(AVFilterChannelLayouts *, a, b); }
#define REDUCE_FORMATS(fmt_type, list_type, list, var, nb, add_format) \ do { \ for (i = 0; i < filter->nb_inputs; i++) { \ AVFilterLink *link = filter->inputs[i]; \ fmt_type fmt; \ if (!link->out_ ## list || link->out_ ## list->nb != 1) \ continue; \ fmt = link->out_ ## list->var[0]; \ \ for (j = 0; j < filter->nb_outputs; j++) { \ AVFilterLink *out_link = filter->outputs[j]; \ list_type *fmts; \ \ if (link->type != out_link->type || \ out_link->in_ ## list->nb == 1) \ continue; \ fmts = out_link->in_ ## list; \ for (k = 0; k < out_link->in_ ## list->nb; k++) \ if (fmts->var[k] == fmt) { \ fmts->var[0] = fmt; \ fmts->nb = 1; \ ret = 1; \ break; \ } \ } \ } \ } while (0)
Useful e.g. if incompatible sample formats but compatible sample rates.
if (link->in_formats != link->out_formats && link->in_formats && link->out_formats) if (!can_merge_formats(link->in_formats, link->out_formats, link->type, 0)) convert_needed = 1; if (link->type == AVMEDIA_TYPE_AUDIO) { if (link->in_samplerates != link->out_samplerates && link->in_samplerates && link->out_samplerates) if (!can_merge_formats(link->in_samplerates, link->out_samplerates, 0, 1)) convert_needed = 1; }
can_merge_formats() makes a copy of the list without refs.
Example: { yuv420p, gray8 } ∩ { rgb24, gray8 } = { gray8 }.
Better convert yuv420p → rgb24 than select gray8 and lose color.
/* Do not lose chroma or alpha in merging. It happens if both lists have formats with chroma (resp. alpha), but the only formats in common do not have it (e.g. YUV+gray vs. RGB+gray): in that case, the merging would select the gray format, possibly causing a lossy conversion elsewhere in the graph. To avoid that, pretend that there are no common formats to force the insertion of a conversion filter. */ if (type == AVMEDIA_TYPE_VIDEO) for (i = 0; i < a->nb_formats; i++) for (j = 0; j < b->nb_formats; j++) { const AVPixFmtDescriptor *adesc = av_pix_fmt_desc_get(a->formats[i]); const AVPixFmtDescriptor *bdesc = av_pix_fmt_desc_get(b->formats[j]); alpha2 |= adesc->flags & bdesc->flags & AV_PIX_FMT_FLAG_ALPHA; chroma2|= adesc->nb_components > 1 && bdesc->nb_components > 1; if (a->formats[i] == b->formats[j]) { alpha1 |= adesc->flags & AV_PIX_FMT_FLAG_ALPHA; chroma1|= adesc->nb_components > 1; } } // If chroma or alpha can be lost through merging then do not merge if (alpha2 > alpha1 || chroma2 > chroma1) return NULL;
while ((ret = query_formats(graph, log_ctx)) == AVERROR(EAGAIN)) av_log(graph, AV_LOG_DEBUG, "query_formats not finished\n");
if (count_queried || count_merged) return AVERROR(EAGAIN); av_log(graph, AV_LOG_ERROR, "The following filters could not choose their formats: %s\n" "Consider inserting the (a)format filter near their input or " "output.\n", bp.str);
Example: amerge: out layout = union of in layouts
if (!ctx->inputs[i]->in_channel_layouts || !ctx->inputs[i]->in_channel_layouts->nb_channel_layouts) { av_log(ctx, AV_LOG_WARNING, "No channel layout for input %d\n", i + 1); return AVERROR(EAGAIN); } inlayout[i] = ctx->inputs[i]->in_channel_layouts->channel_layouts[0];
Problem: fragile, does not work when many filters need it.
More robust system for complex constraints
Partial graph reconfiguration