From: Derek Buitenhuis Date: Mon, 18 Apr 2016 14:33:38 +0000 (+0100) Subject: Merge commit '8a02a8031ef4f98faf5647f0e01a8922247bf748' X-Git-Tag: android-x86-7.1-r1~5544 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=94e5f0922b72f0a40ed328b70572ed868571cb96;p=android-x86%2Fexternal-ffmpeg.git Merge commit '8a02a8031ef4f98faf5647f0e01a8922247bf748' * commit '8a02a8031ef4f98faf5647f0e01a8922247bf748': lavfi: add an NVIDIA NPP-based scaling filter Merged-by: Derek Buitenhuis --- 94e5f0922b72f0a40ed328b70572ed868571cb96 diff --cc Changelog index 827e3e255c,692c865f52..58ac1d7b0c --- a/Changelog +++ b/Changelog @@@ -2,197 -2,17 +2,198 @@@ Entries are sorted chronologically fro releases are sorted from youngest to oldest. version : -- aliases and defaults for Ogg subtypes (opus, spx) -- HEVC/H.265 RTP payload format (draft v6) packetizer and depacketizer -- avplay now exits by default at the end of playback -- XCB-based screen-grabber -- creating DASH compatible fragmented MP4, MPEG-DASH segmenting muxer -- H.261 RTP payload format (RFC 4587) depacketizer and experimental packetizer +- DXVA2-accelerated HEVC Main10 decoding +- fieldhint filter +- loop video filter and aloop audio filter +- Bob Weaver deinterlacing filter +- firequalizer filter +- datascope filter +- bench and abench filters +- ciescope filter +- protocol blacklisting API +- MediaCodec H264 decoding +- VC-2 HQ RTP payload format (draft v1) depacketizer +- AudioToolbox audio decoders +- AudioToolbox audio encoders +- coreimage filter (GPU based image filtering on OSX) +- libdcadec removed +- bitstream filter for extracting DTS core +- ADPCM IMA DAT4 decoder +- musx demuxer +- aix demuxer +- remap filter +- hash and framehash muxers +- colorspace filter +- hdcd filter +- readvitc filter +- VAAPI-accelerated format conversion and scaling ++- libnpp/CUDA-accelerated format conversion and scaling + +version 3.0: +- Common Encryption (CENC) MP4 encoding and decoding support +- DXV decoding +- extrastereo filter +- ocr filter +- alimiter filter +- stereowiden filter +- stereotools filter +- rubberband filter +- tremolo filter +- agate filter +- chromakey filter +- maskedmerge filter +- Screenpresso SPV1 decoding +- chromaprint fingerprinting muxer +- ffplay dynamic volume control +- displace filter +- selectivecolor filter +- extensive native AAC encoder improvements and removal of experimental flag +- ADPCM PSX decoder +- 3dostr, dcstr, fsb, genh, vag, xvag, ads, msf, svag & vpk demuxer +- zscale filter +- wve demuxer +- zero-copy Intel QSV transcoding in ffmpeg +- shuffleframes filter +- SDX2 DPCM decoder +- vibrato filter +- innoHeim/Rsupport Screen Capture Codec decoder +- ADPCM AICA decoder +- Interplay ACM demuxer and audio decoder +- XMA1 & XMA2 decoder +- realtime filter +- anoisesrc audio filter source +- IVR demuxer +- compensationdelay filter +- acompressor filter +- support encoding 16-bit RLE SGI images +- apulsator filter +- sidechaingate audio filter +- mipsdspr1 option has been renamed to mipsdsp +- aemphasis filter +- mips32r5 option has been removed +- mips64r6 option has been removed +- DXVA2-accelerated VP9 decoding +- SOFAlizer: virtual binaural acoustics filter +- VAAPI VP9 hwaccel +- audio high-order multiband parametric equalizer +- automatic bitstream filtering +- showspectrumpic filter +- libstagefright support removed +- spectrumsynth filter +- ahistogram filter +- only seek with the right mouse button in ffplay +- toggle full screen when double-clicking with the left mouse button in ffplay +- afftfilt filter +- convolution filter +- libquvi support removed +- support for dvaudio in wav and avi +- libaacplus and libvo-aacenc support removed +- Cineform HD decoder +- new DCA decoder with full support for DTS-HD extensions +- significant performance improvements in Windows Television (WTV) demuxer +- nnedi deinterlacer +- streamselect video and astreamselect audio filter +- swaprect filter +- metadata video and ametadata audio filter +- SMPTE VC-2 HQ profile support for the Dirac decoder +- SMPTE VC-2 native encoder supporting the HQ profile + + +version 2.8: +- colorkey video filter +- BFSTM/BCSTM demuxer +- little-endian ADPCM_THP decoder +- Hap decoder and encoder +- DirectDraw Surface image/texture decoder +- ssim filter +- optional new ASF demuxer +- showvolume filter +- Many improvements to the JPEG 2000 decoder +- Go2Meeting decoding support +- adrawgraph audio and drawgraph video filter +- removegrain video filter +- Intel QSV-accelerated MPEG-2 video and HEVC encoding +- Intel QSV-accelerated MPEG-2 video and HEVC decoding +- Intel QSV-accelerated VC-1 video decoding +- libkvazaar HEVC encoder +- erosion, dilation, deflate and inflate video filters +- Dynamic Audio Normalizer as dynaudnorm filter +- Reverse video and areverse audio filter +- Random filter +- deband filter +- AAC fixed-point decoding +- sidechaincompress audio filter +- bitstream filter for converting HEVC from MP4 to Annex B +- acrossfade audio filter +- allyuv and allrgb video sources +- atadenoise video filter +- OS X VideoToolbox support +- aphasemeter filter +- showfreqs filter +- vectorscope filter +- waveform filter +- hstack and vstack filter +- Support DNx100 (1440x1080@8) +- VAAPI hevc hwaccel +- VDPAU hevc hwaccel +- framerate filter +- Switched default encoders for webm to VP9 and Opus +- Removed experimental flag from the JPEG 2000 encoder + + +version 2.7: +- FFT video filter +- TDSC decoder +- DTS lossless extension (XLL) decoding (not lossless, disabled by default) +- showwavespic filter +- DTS decoding through libdcadec +- Drop support for nvenc API before 5.0 +- nvenc HEVC encoder +- Detelecine filter +- Intel QSV-accelerated H.264 encoding +- MMAL-accelerated H.264 decoding +- basic APNG encoder and muxer with default extension "apng" +- unpack DivX-style packed B-frames in MPEG-4 bitstream filter +- WebM Live Chunk Muxer +- nvenc level and tier options +- chorus filter +- Canopus HQ/HQA decoder +- Automatically rotate videos based on metadata in ffmpeg +- improved Quickdraw compatibility +- VP9 high bit-depth and extended colorspaces decoding support +- WebPAnimEncoder API when available for encoding and muxing WebP +- Direct3D11-accelerated decoding +- Support Secure Transport +- Multipart JPEG demuxer + + +version 2.6: +- nvenc encoder +- 10bit spp filter +- colorlevels filter +- RIFX format for *.wav files - RTP/mpegts muxer -- VP8 in Ogg demuxing +- non continuous cache protocol support +- tblend filter +- cropdetect support for non 8bpp, absolute (if limit >= 1) and relative (if limit < 1.0) threshold +- Camellia symmetric block cipher - OpenH264 encoder wrapper +- VOC seeking support +- Closed caption Decoder +- fspp, uspp, pp7 MPlayer postprocessing filters ported to native filters +- showpalette filter +- Twofish symmetric block cipher - Support DNx100 (960x720@8) -- Direct3D11-accelerated decoding +- eq2 filter ported from libmpcodecs as eq filter +- removed libmpcodecs +- Changed default DNxHD colour range in QuickTime .mov derivatives to mpeg range +- ported softpulldown filter from libmpcodecs as repeatfields filter +- dcshift filter +- RTP depacketizer for loss tolerant payload format for MP3 audio (RFC 5219) +- RTP depacketizer for AC3 payload format (RFC 4184) +- palettegen and paletteuse filters +- VP9 RTP payload format (draft 0) experimental depacketizer +- RTP depacketizer for DV (RFC 6469) - DXVA2-accelerated HEVC decoding - AAC ELD 480 decoding - Intel QSV-accelerated H.264 decoding diff --cc configure index cb3d304f49,a68a5d7354..97f374bc1b --- a/configure +++ b/configure @@@ -230,10 -194,8 +230,11 @@@ External library support --enable-libilbc enable iLBC de/encoding via libilbc [no] --enable-libkvazaar enable HEVC encoding via libkvazaar [no] --enable-libmfx enable HW acceleration through libmfx + --enable-libmodplug enable ModPlug via libmodplug [no] --enable-libmp3lame enable MP3 encoding via libmp3lame [no] + --enable-libnut enable NUT (de)muxing via libnut, + native (de)muxer exists [no] + --enable-libnpp enable NVIDIA Performance Primitives-based code [no] --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no] --enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no] --enable-libopencv enable video filtering via libopencv [no] @@@ -1479,9 -1222,8 +1480,10 @@@ EXTERNAL_LIBRARY_LIST= libilbc libkvazaar libmfx + libmodplug libmp3lame + libnut + libnpp libopencore_amrnb libopencore_amrwb libopencv @@@ -2948,60 -2358,10 +2950,61 @@@ histeq_filter_deps="gpl hqdn3d_filter_deps="gpl" hwupload_cuda_filter_deps="cuda" interlace_filter_deps="gpl" +kerndeint_filter_deps="gpl" +ladspa_filter_deps="ladspa dlopen" +mcdeint_filter_deps="avcodec gpl" +movie_filter_deps="avcodec avformat" +mpdecimate_filter_deps="gpl" +mpdecimate_filter_select="pixelutils" +mptestsrc_filter_deps="gpl" +negate_filter_deps="lut_filter" +nnedi_filter_deps="gpl" +ocr_filter_deps="libtesseract" ocv_filter_deps="libopencv" +owdenoise_filter_deps="gpl" +pan_filter_deps="swresample" +perspective_filter_deps="gpl" +phase_filter_deps="gpl" +pp7_filter_deps="gpl" +pp_filter_deps="gpl postproc" +pullup_filter_deps="gpl" +removelogo_filter_deps="avcodec avformat swscale" +repeatfields_filter_deps="gpl" resample_filter_deps="avresample" +rubberband_filter_deps="librubberband" +sab_filter_deps="gpl swscale" +scale2ref_filter_deps="swscale" scale_filter_deps="swscale" +select_filter_select="pixelutils" +showcqt_filter_deps="avcodec avformat swscale" +showcqt_filter_select="fft" +showfreqs_filter_deps="avcodec" +showfreqs_filter_select="fft" +showspectrum_filter_deps="avcodec" +showspectrum_filter_select="fft" +showspectrumpic_filter_deps="avcodec" +showspectrumpic_filter_select="fft" +smartblur_filter_deps="gpl swscale" +sofalizer_filter_deps="netcdf avcodec" +sofalizer_filter_select="fft" +spectrumsynth_filter_deps="avcodec" +spectrumsynth_filter_select="fft" +spp_filter_deps="gpl avcodec" +spp_filter_select="fft idctdsp fdctdsp me_cmp pixblockdsp" +stereo3d_filter_deps="gpl" +subtitles_filter_deps="avformat avcodec libass" +super2xsai_filter_deps="gpl" +pixfmts_super2xsai_test_deps="super2xsai_filter" +tinterlace_filter_deps="gpl" +tinterlace_merge_test_deps="tinterlace_filter" +tinterlace_pad_test_deps="tinterlace_filter" +uspp_filter_deps="gpl avcodec" +vidstabdetect_filter_deps="libvidstab" +vidstabtransform_filter_deps="libvidstab" +zmq_filter_deps="libzmq" +zoompan_filter_deps="swscale" +zscale_filter_deps="libzimg" + scale_npp_filter_deps="cuda libnpp" scale_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer" # examples @@@ -4959,14 -4026,14 +4962,15 @@@ die_license_disabled gpl x11gra die_license_disabled nonfree cuda die_license_disabled nonfree libfaac -die_license_disabled nonfree libfdk_aac + die_license_disabled nonfree libnpp die_license_disabled nonfree nvenc -die_license_disabled nonfree openssl +enabled gpl && die_license_disabled_gpl nonfree libfdk_aac +enabled gpl && die_license_disabled_gpl nonfree openssl +die_license_disabled version3 gmp die_license_disabled version3 libopencore_amrnb die_license_disabled version3 libopencore_amrwb -die_license_disabled version3 libvo_aacenc +die_license_disabled version3 libsmbclient die_license_disabled version3 libvo_amrwbenc enabled version3 && { enabled gpl && enable gplv3 || enable lgplv3; } @@@ -5564,64 -4523,41 +5568,65 @@@ enabled libgsm && { for gsm_ enabled libilbc && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc enabled libkvazaar && require_pkg_config "kvazaar >= 0.8.1" kvazaar.h kvz_api_get enabled libmfx && require_pkg_config libmfx "mfx/mfxvideo.h" MFXInit +enabled libmodplug && require_pkg_config libmodplug libmodplug/modplug.h ModPlug_Load enabled libmp3lame && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame +enabled libnut && require libnut libnut.h nut_demuxer_init -lnut + enabled libnpp && require libnpp npp.h nppGetLibVersion -lnppi -lnppc enabled libopencore_amrnb && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb enabled libopencore_amrwb && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb -enabled libopencv && require_pkg_config opencv opencv/cv.h cvCreateImageHeader +enabled libopencv && { check_header opencv2/core/core_c.h && + { use_pkg_config opencv opencv2/core/core_c.h cvCreateImageHeader || + require opencv opencv2/core/core_c.h cvCreateImageHeader -lopencv_core -lopencv_imgproc; } || + require_pkg_config opencv opencv/cxcore.h cvCreateImageHeader; } enabled libopenh264 && require_pkg_config openh264 wels/codec_api.h WelsGetCodecVersion -enabled libopenjpeg && { { check_header openjpeg.h && check_lib2 openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC; } || - { require_pkg_config libopenjpeg1 openjpeg.h opj_version -DOPJ_STATIC; } } +enabled libopenjpeg && { check_lib openjpeg-2.1/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC || + check_lib openjpeg-2.0/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC || + check_lib openjpeg-1.5/openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC || + check_lib openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC || + die "ERROR: libopenjpeg not found"; } enabled libopus && require_pkg_config opus opus_multistream.h opus_multistream_decoder_create -enabled libpulse && require_pkg_config libpulse-simple pulse/simple.h pa_simple_new +enabled libpulse && require_pkg_config libpulse pulse/pulseaudio.h pa_context_new enabled librtmp && require_pkg_config librtmp librtmp/rtmp.h RTMP_Socket +enabled librubberband && require_pkg_config "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new enabled libschroedinger && require_pkg_config schroedinger-1.0 schroedinger/schro.h schro_init +enabled libshine && require_pkg_config shine shine/layer3.h shine_encode_buffer +enabled libsmbclient && { use_pkg_config smbclient libsmbclient.h smbc_init || + require smbclient libsmbclient.h smbc_init -lsmbclient; } enabled libsnappy && require snappy snappy-c.h snappy_compress -lsnappy +enabled libsoxr && require libsoxr soxr.h soxr_create -lsoxr && LIBSOXR="-lsoxr" +enabled libssh && require_pkg_config libssh libssh/sftp.h sftp_init enabled libspeex && require_pkg_config speex speex/speex.h speex_decoder_init -lspeex +enabled libtesseract && require_pkg_config tesseract tesseract/capi.h TessBaseAPICreate enabled libtheora && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg -enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame -enabled libvo_aacenc && require libvo_aacenc vo-aacenc/voAAC.h voGetAACEncAPI -lvo-aacenc +enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame && + { check_lib twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || + die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } +enabled libutvideo && require_cpp utvideo "stdint.h stdlib.h utvideo/utvideo.h utvideo/Codec.h" 'CCodec*' -lutvideo -lstdc++ +enabled libv4l2 && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl +enabled libvidstab && require_pkg_config "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc enabled libvorbis && require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg -enabled libvpx && require_pkg_config "vpx >= 1.3.0" vpx/vpx_codec.h vpx_codec_version && { + +enabled libvpx && { enabled libvpx_vp8_decoder && { - check_pkg_config vpx "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp8_dx || - disable libvpx_vp8_decoder; + use_pkg_config "vpx >= 0.9.1" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp8_dx || + check_lib2 "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_dec_init_ver -lvpx || + die "ERROR: libvpx decoder version must be >=0.9.1"; } enabled libvpx_vp8_encoder && { - check_pkg_config vpx "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp8_cx || - disable libvpx_vp8_encoder; + use_pkg_config "vpx >= 0.9.7" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp8_cx || + check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VP8E_SET_MAX_INTRA_BITRATE_PCT" -lvpx || + die "ERROR: libvpx encoder version must be >=0.9.7"; } enabled libvpx_vp9_decoder && { - check_pkg_config vpx "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp9_dx || - disable libvpx_vp9_decoder; + use_pkg_config "vpx >= 1.3.0" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp9_dx || + check_lib2 "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp9_dx" -lvpx || + disable libvpx_vp9_decoder; } enabled libvpx_vp9_encoder && { - check_pkg_config vpx "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp9_cx || - disable libvpx_vp9_encoder; + use_pkg_config "vpx >= 1.3.0" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp9_cx || + check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp9_cx VP9E_SET_AQ_MODE" -lvpx || + disable libvpx_vp9_encoder; } if disabled_all libvpx_vp8_decoder libvpx_vp9_decoder libvpx_vp8_encoder libvpx_vp9_encoder; then die "libvpx enabled but no supported decoders found" diff --cc doc/filters.texi index 7634b25532,23e4286da8..8fca52d7ad --- a/doc/filters.texi +++ b/doc/filters.texi @@@ -2637,954 -1724,583 +2637,995 @@@ Only used if plugin have zero inputs @end table -All expressions default to "val". +@subsection Examples -Some examples: +@itemize +@item +List all available plugins within amp (LADSPA example plugin) library: @example -# Negate input video -lutrgb="r=maxval+minval-val:g=maxval+minval-val:b=maxval+minval-val" -lutyuv="y=maxval+minval-val:u=maxval+minval-val:v=maxval+minval-val" +ladspa=file=amp +@end example -# The above is the same as -lutrgb="r=negval:g=negval:b=negval" -lutyuv="y=negval:u=negval:v=negval" +@item +List all available controls and their valid ranges for @code{vcf_notch} +plugin from @code{VCF} library: +@example +ladspa=f=vcf:p=vcf_notch:c=help +@end example -# Negate luminance -lutyuv=negval +@item +Simulate low quality audio equipment using @code{Computer Music Toolkit} (CMT) +plugin library: +@example +ladspa=file=cmt:plugin=lofi:controls=c0=22|c1=12|c2=12 +@end example -# Remove chroma components, turning the video into a graytone image -lutyuv="u=128:v=128" +@item +Add reverberation to the audio using TAP-plugins +(Tom's Audio Processing plugins): +@example +ladspa=file=tap_reverb:tap_reverb +@end example -# Apply a luma burning effect -lutyuv="y=2*val" +@item +Generate white noise, with 0.2 amplitude: +@example +ladspa=file=cmt:noise_source_white:c=c0=.2 +@end example -# Remove green and blue components -lutrgb="g=0:b=0" +@item +Generate 20 bpm clicks using plugin @code{C* Click - Metronome} from the +@code{C* Audio Plugin Suite} (CAPS) library: +@example +ladspa=file=caps:Click:c=c1=20' +@end example -# Set a constant alpha channel value on input -format=rgba,lutrgb=a="maxval-minval/2" +@item +Apply @code{C* Eq10X2 - Stereo 10-band equaliser} effect: +@example +ladspa=caps:Eq10X2:c=c0=-48|c9=-24|c3=12|c4=2 +@end example -# Correct luminance gamma by a factor of 0.5 -lutyuv=y=gammaval(0.5) +@item +Increase volume by 20dB using fast lookahead limiter from Steve Harris +@code{SWH Plugins} collection: +@example +ladspa=fast_lookahead_limiter_1913:fastLookaheadLimiter:20|0|2 @end example -@section negate +@item +Attenuate low frequencies using Multiband EQ from Steve Harris +@code{SWH Plugins} collection: +@example +ladspa=mbeq_1197:mbeq:-24|-24|-24|0|0|0|0|0|0|0|0|0|0|0|0 +@end example +@end itemize -Negate input video. +@subsection Commands -It accepts an integer in input; if non-zero it negates the -alpha component (if available). The default value in input is 0. +This filter supports the following commands: +@table @option +@item cN +Modify the @var{N}-th control value. -@section noformat +If the specified value is not valid, it is ignored and prior one is kept. +@end table -Force libavfilter not to use any of the specified pixel formats for the -input to the next filter. +@section lowpass + +Apply a low-pass filter with 3dB point frequency. +The filter can be either single-pole or double-pole (the default). +The filter roll off at 6dB per pole per octave (20dB per pole per decade). + +The filter accepts the following options: -It accepts the following parameters: @table @option +@item frequency, f +Set frequency in Hz. Default is 500. -@item pix_fmts -A '|'-separated list of pixel format names, such as -apix_fmts=yuv420p|monow|rgb24". +@item poles, p +Set number of poles. Default is 2. +@item width_type +Set method to specify band-width of filter. +@table @option +@item h +Hz +@item q +Q-Factor +@item o +octave +@item s +slope @end table -Some examples: -@example -# Force libavfilter to use a format different from "yuv420p" for the -# input to the vflip filter -noformat=pix_fmts=yuv420p,vflip +@item width, w +Specify the band-width of a filter in width_type units. +Applies only to double-pole filter. +The default is 0.707q and gives a Butterworth response. +@end table -# Convert the input video to any of the formats not contained in the list -noformat=yuv420p|yuv444p|yuv410p -@end example +@anchor{pan} +@section pan -@section null +Mix channels with specific gain levels. The filter accepts the output +channel layout followed by a set of channels definitions. -Pass the video source unchanged to the output. +This filter is also designed to efficiently remap the channels of an audio +stream. -@section ocv +The filter accepts parameters of the form: +"@var{l}|@var{outdef}|@var{outdef}|..." -Apply a video transform using libopencv. +@table @option +@item l +output channel layout or number of channels -To enable this filter, install the libopencv library and headers and -configure Libav with --enable-libopencv. +@item outdef +output channel specification, of the form: +"@var{out_name}=[@var{gain}*]@var{in_name}[+[@var{gain}*]@var{in_name}...]" -It accepts the following parameters: +@item out_name +output channel to define, either a channel name (FL, FR, etc.) or a channel +number (c0, c1, etc.) -@table @option - -@item filter_name -The name of the libopencv filter to apply. - -@item filter_params -The parameters to pass to the libopencv filter. If not specified, the default -values are assumed. +@item gain +multiplicative coefficient for the channel, 1 leaving the volume unchanged +@item in_name +input channel to use, see out_name for details; it is not possible to mix +named and numbered input channels @end table -Refer to the official libopencv documentation for more precise -information: -@url{http://opencv.willowgarage.com/documentation/c/image_filtering.html} - -Several libopencv filters are supported; see the following subsections. +If the `=' in a channel specification is replaced by `<', then the gains for +that specification will be renormalized so that the total is 1, thus +avoiding clipping noise. -@anchor{dilate} -@subsection dilate +@subsection Mixing examples -Dilate an image by using a specific structuring element. -It corresponds to the libopencv function @code{cvDilate}. +For example, if you want to down-mix from stereo to mono, but with a bigger +factor for the left channel: +@example +pan=1c|c0=0.9*c0+0.1*c1 +@end example -It accepts the parameters: @var{struct_el}|@var{nb_iterations}. +A customized down-mix to stereo that works automatically for 3-, 4-, 5- and +7-channels surround: +@example +pan=stereo| FL < FL + 0.5*FC + 0.6*BL + 0.6*SL | FR < FR + 0.5*FC + 0.6*BR + 0.6*SR +@end example -@var{struct_el} represents a structuring element, and has the syntax: -@var{cols}x@var{rows}+@var{anchor_x}x@var{anchor_y}/@var{shape} +Note that @command{ffmpeg} integrates a default down-mix (and up-mix) system +that should be preferred (see "-ac" option) unless you have very specific +needs. -@var{cols} and @var{rows} represent the number of columns and rows of -the structuring element, @var{anchor_x} and @var{anchor_y} the anchor -point, and @var{shape} the shape for the structuring element. @var{shape} -must be "rect", "cross", "ellipse", or "custom". +@subsection Remapping examples -If the value for @var{shape} is "custom", it must be followed by a -string of the form "=@var{filename}". The file with name -@var{filename} is assumed to represent a binary image, with each -printable character corresponding to a bright pixel. When a custom -@var{shape} is used, @var{cols} and @var{rows} are ignored, the number -or columns and rows of the read file are assumed instead. +The channel remapping will be effective if, and only if: -The default value for @var{struct_el} is "3x3+0x0/rect". +@itemize +@item gain coefficients are zeroes or ones, +@item only one input per channel output, +@end itemize -@var{nb_iterations} specifies the number of times the transform is -applied to the image, and defaults to 1. +If all these conditions are satisfied, the filter will notify the user ("Pure +channel mapping detected"), and use an optimized and lossless method to do the +remapping. -Some examples: +For example, if you have a 5.1 source and want a stereo audio stream by +dropping the extra channels: @example -# Use the default values -ocv=dilate - -# Dilate using a structuring element with a 5x5 cross, iterating two times -ocv=filter_name=dilate:filter_params=5x5+2x2/cross|2 - -# Read the shape from the file diamond.shape, iterating two times. -# The file diamond.shape may contain a pattern of characters like this -# * -# *** -# ***** -# *** -# * -# The specified columns and rows are ignored -# but the anchor point coordinates are not -ocv=dilate:0x0+2x2/custom=diamond.shape|2 +pan="stereo| c0=FL | c1=FR" @end example -@subsection erode - -Erode an image by using a specific structuring element. -It corresponds to the libopencv function @code{cvErode}. - -It accepts the parameters: @var{struct_el}:@var{nb_iterations}, -with the same syntax and semantics as the @ref{dilate} filter. +Given the same source, you can also switch front left and front right channels +and keep the input channel layout: +@example +pan="5.1| c0=c1 | c1=c0 | c2=c2 | c3=c3 | c4=c4 | c5=c5" +@end example -@subsection smooth +If the input is a stereo audio stream, you can mute the front left channel (and +still keep the stereo channel layout) with: +@example +pan="stereo|c1=c1" +@end example -Smooth the input video. +Still with a stereo audio stream input, you can copy the right channel in both +front left and right: +@example +pan="stereo| c0=FR | c1=FR" +@end example -The filter takes the following parameters: -@var{type}|@var{param1}|@var{param2}|@var{param3}|@var{param4}. +@section replaygain -@var{type} is the type of smooth filter to apply, and must be one of -the following values: "blur", "blur_no_scale", "median", "gaussian", -or "bilateral". The default value is "gaussian". +ReplayGain scanner filter. This filter takes an audio stream as an input and +outputs it unchanged. +At end of filtering it displays @code{track_gain} and @code{track_peak}. -The meaning of @var{param1}, @var{param2}, @var{param3}, and @var{param4} -depend on the smooth type. @var{param1} and -@var{param2} accept integer positive values or 0. @var{param3} and -@var{param4} accept floating point values. +@section resample -The default value for @var{param1} is 3. The default value for the -other parameters is 0. +Convert the audio sample format, sample rate and channel layout. It is +not meant to be used directly. -These parameters correspond to the parameters assigned to the -libopencv function @code{cvSmooth}. +@section rubberband +Apply time-stretching and pitch-shifting with librubberband. -@anchor{overlay} -@section overlay +The filter accepts the following options: -Overlay one video on top of another. +@table @option +@item tempo +Set tempo scale factor. -It takes two inputs and has one output. The first input is the "main" -video on which the second input is overlayed. +@item pitch +Set pitch scale factor. -It accepts the following parameters: +@item transients +Set transients detector. +Possible values are: +@table @var +@item crisp +@item mixed +@item smooth +@end table -@table @option +@item detector +Set detector. +Possible values are: +@table @var +@item compound +@item percussive +@item soft +@end table -@item x -The horizontal position of the left edge of the overlaid video on the main video. +@item phase +Set phase. +Possible values are: +@table @var +@item laminar +@item independent +@end table -@item y -The vertical position of the top edge of the overlaid video on the main video. +@item window +Set processing window size. +Possible values are: +@table @var +@item standard +@item short +@item long +@end table +@item smoothing +Set smoothing. +Possible values are: +@table @var +@item off +@item on @end table -The parameters are expressions containing the following parameters: +@item formant +Enable formant preservation when shift pitching. +Possible values are: +@table @var +@item shifted +@item preserved +@end table -@table @option -@item main_w, main_h -The main input width and height. +@item pitchq +Set pitch quality. +Possible values are: +@table @var +@item quality +@item speed +@item consistency +@end table -@item W, H -These are the same as @var{main_w} and @var{main_h}. +@item channels +Set channels. +Possible values are: +@table @var +@item apart +@item together +@end table +@end table -@item overlay_w, overlay_h -The overlay input width and height. +@section sidechaincompress -@item w, h -These are the same as @var{overlay_w} and @var{overlay_h}. +This filter acts like normal compressor but has the ability to compress +detected signal using second input signal. +It needs two input streams and returns one output stream. +First input stream will be processed depending on second stream signal. +The filtered signal then can be filtered with other filters in later stages of +processing. See @ref{pan} and @ref{amerge} filter. -@item eof_action -The action to take when EOF is encountered on the secondary input; it accepts -one of the following values: +The filter accepts the following options: @table @option -@item repeat -Repeat the last frame (the default). -@item endall -End both streams. -@item pass -Pass the main input through. -@end table +@item level_in +Set input gain. Default is 1. Range is between 0.015625 and 64. +@item threshold +If a signal of second stream raises above this level it will affect the gain +reduction of first stream. +By default is 0.125. Range is between 0.00097563 and 1. + +@item ratio +Set a ratio about which the signal is reduced. 1:2 means that if the level +raised 4dB above the threshold, it will be only 2dB above after the reduction. +Default is 2. Range is between 1 and 20. + +@item attack +Amount of milliseconds the signal has to rise above the threshold before gain +reduction starts. Default is 20. Range is between 0.01 and 2000. + +@item release +Amount of milliseconds the signal has to fall below the threshold before +reduction is decreased again. Default is 250. Range is between 0.01 and 9000. + +@item makeup +Set the amount by how much signal will be amplified after processing. +Default is 2. Range is from 1 and 64. + +@item knee +Curve the sharp knee around the threshold to enter gain reduction more softly. +Default is 2.82843. Range is between 1 and 8. + +@item link +Choose if the @code{average} level between all channels of side-chain stream +or the louder(@code{maximum}) channel of side-chain stream affects the +reduction. Default is @code{average}. + +@item detection +Should the exact signal be taken in case of @code{peak} or an RMS one in case +of @code{rms}. Default is @code{rms} which is mainly smoother. + +@item level_sc +Set sidechain gain. Default is 1. Range is between 0.015625 and 64. + +@item mix +How much to use compressed signal in output. Default is 1. +Range is between 0 and 1. @end table -Be aware that frames are taken from each input video in timestamp -order, hence, if their initial timestamps differ, it is a a good idea -to pass the two inputs through a @var{setpts=PTS-STARTPTS} filter to -have them begin in the same zero timestamp, as the example for -the @var{movie} filter does. +@subsection Examples -Some examples: +@itemize +@item +Full ffmpeg example taking 2 audio inputs, 1st input to be compressed +depending on the signal of 2nd input and later compressed signal to be +merged with 2nd input: @example -# Draw the overlay at 10 pixels from the bottom right -# corner of the main video -overlay=x=main_w-overlay_w-10:y=main_h-overlay_h-10 +ffmpeg -i main.flac -i sidechain.flac -filter_complex "[1:a]asplit=2[sc][mix];[0:a][sc]sidechaincompress[compr];[compr][mix]amerge" +@end example +@end itemize -# Insert a transparent PNG logo in the bottom left corner of the input -avconv -i input -i logo -filter_complex 'overlay=x=10:y=main_h-overlay_h-10' output +@section sidechaingate -# Insert 2 different transparent PNG logos (second logo on bottom -# right corner) -avconv -i input -i logo1 -i logo2 -filter_complex -'overlay=x=10:y=H-h-10,overlay=x=W-w-10:y=H-h-10' output +A sidechain gate acts like a normal (wideband) gate but has the ability to +filter the detected signal before sending it to the gain reduction stage. +Normally a gate uses the full range signal to detect a level above the +threshold. +For example: If you cut all lower frequencies from your sidechain signal +the gate will decrease the volume of your track only if not enough highs +appear. With this technique you are able to reduce the resonation of a +natural drum or remove "rumbling" of muted strokes from a heavily distorted +guitar. +It needs two input streams and returns one output stream. +First input stream will be processed depending on second stream signal. -# Add a transparent color layer on top of the main video; -# WxH specifies the size of the main input to the overlay filter -color=red@.3:WxH [over]; [in][over] overlay [out] +The filter accepts the following options: -# Mask 10-20 seconds of a video by applying the delogo filter to a section -avconv -i test.avi -codec:v:0 wmv2 -ar 11025 -b:v 9000k --vf '[in]split[split_main][split_delogo];[split_delogo]trim=start=360:end=371,delogo=0:0:640:480[delogoed];[split_main][delogoed]overlay=eof_action=pass[out]' -masked.avi -@end example +@table @option +@item level_in +Set input level before filtering. +Default is 1. Allowed range is from 0.015625 to 64. -You can chain together more overlays but the efficiency of such -approach is yet to be tested. +@item range +Set the level of gain reduction when the signal is below the threshold. +Default is 0.06125. Allowed range is from 0 to 1. -@section pad +@item threshold +If a signal rises above this level the gain reduction is released. +Default is 0.125. Allowed range is from 0 to 1. + +@item ratio +Set a ratio about which the signal is reduced. +Default is 2. Allowed range is from 1 to 9000. + +@item attack +Amount of milliseconds the signal has to rise above the threshold before gain +reduction stops. +Default is 20 milliseconds. Allowed range is from 0.01 to 9000. + +@item release +Amount of milliseconds the signal has to fall below the threshold before the +reduction is increased again. Default is 250 milliseconds. +Allowed range is from 0.01 to 9000. + +@item makeup +Set amount of amplification of signal after processing. +Default is 1. Allowed range is from 1 to 64. + +@item knee +Curve the sharp knee around the threshold to enter gain reduction more softly. +Default is 2.828427125. Allowed range is from 1 to 8. + +@item detection +Choose if exact signal should be taken for detection or an RMS like one. +Default is rms. Can be peak or rms. + +@item link +Choose if the average level between all channels or the louder channel affects +the reduction. +Default is average. Can be average or maximum. + +@item level_sc +Set sidechain gain. Default is 1. Range is from 0.015625 to 64. +@end table -Add paddings to the input image, and place the original input at the -provided @var{x}, @var{y} coordinates. +@section silencedetect -It accepts the following parameters: +Detect silence in an audio stream. -@table @option -@item width, height +This filter logs a message when it detects that the input audio volume is less +or equal to a noise tolerance value for a duration greater or equal to the +minimum detected noise duration. -Specify the size of the output image with the paddings added. If the -value for @var{width} or @var{height} is 0, the corresponding input size -is used for the output. +The printed times and duration are expressed in seconds. -The @var{width} expression can reference the value set by the -@var{height} expression, and vice versa. +The filter accepts the following options: -The default value of @var{width} and @var{height} is 0. +@table @option +@item duration, d +Set silence duration until notification (default is 2 seconds). -@item x, y +@item noise, n +Set noise tolerance. Can be specified in dB (in case "dB" is appended to the +specified value) or amplitude ratio. Default is -60dB, or 0.001. +@end table -Specify the offsets to place the input image at within the padded area, -with respect to the top/left border of the output image. +@subsection Examples -The @var{x} expression can reference the value set by the @var{y} -expression, and vice versa. +@itemize +@item +Detect 5 seconds of silence with -50dB noise tolerance: +@example +silencedetect=n=-50dB:d=5 +@end example -The default value of @var{x} and @var{y} is 0. +@item +Complete example with @command{ffmpeg} to detect silence with 0.0001 noise +tolerance in @file{silence.mp3}: +@example +ffmpeg -i silence.mp3 -af silencedetect=noise=0.0001 -f null - +@end example +@end itemize -@item color +@section silenceremove -Specify the color of the padded area. It can be the name of a color -(case insensitive match) or an 0xRRGGBB[AA] sequence. +Remove silence from the beginning, middle or end of the audio. -The default value of @var{color} is "black". +The filter accepts the following options: +@table @option +@item start_periods +This value is used to indicate if audio should be trimmed at beginning of +the audio. A value of zero indicates no silence should be trimmed from the +beginning. When specifying a non-zero value, it trims audio up until it +finds non-silence. Normally, when trimming silence from beginning of audio +the @var{start_periods} will be @code{1} but it can be increased to higher +values to trim all audio up to specific count of non-silence periods. +Default value is @code{0}. + +@item start_duration +Specify the amount of time that non-silence must be detected before it stops +trimming audio. By increasing the duration, bursts of noises can be treated +as silence and trimmed off. Default value is @code{0}. + +@item start_threshold +This indicates what sample value should be treated as silence. For digital +audio, a value of @code{0} may be fine but for audio recorded from analog, +you may wish to increase the value to account for background noise. +Can be specified in dB (in case "dB" is appended to the specified value) +or amplitude ratio. Default value is @code{0}. + +@item stop_periods +Set the count for trimming silence from the end of audio. +To remove silence from the middle of a file, specify a @var{stop_periods} +that is negative. This value is then treated as a positive value and is +used to indicate the effect should restart processing as specified by +@var{start_periods}, making it suitable for removing periods of silence +in the middle of the audio. +Default value is @code{0}. + +@item stop_duration +Specify a duration of silence that must exist before audio is not copied any +more. By specifying a higher duration, silence that is wanted can be left in +the audio. +Default value is @code{0}. + +@item stop_threshold +This is the same as @option{start_threshold} but for trimming silence from +the end of audio. +Can be specified in dB (in case "dB" is appended to the specified value) +or amplitude ratio. Default value is @code{0}. + +@item leave_silence +This indicate that @var{stop_duration} length of audio should be left intact +at the beginning of each period of silence. +For example, if you want to remove long pauses between words but do not want +to remove the pauses completely. Default value is @code{0}. + +@item detection +Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster +and works better with digital silence which is exactly 0. +Default value is @code{rms}. + +@item window +Set ratio used to calculate size of window for detecting silence. +Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}. @end table -The parameters @var{width}, @var{height}, @var{x}, and @var{y} are -expressions containing the following constants: +@subsection Examples -@table @option -@item E, PI, PHI -These are approximated values for the mathematical constants e -(Euler's number), pi (Greek pi), and phi (the golden ratio). +@itemize +@item +The following example shows how this filter can be used to start a recording +that does not contain the delay at the start which usually occurs between +pressing the record button and the start of the performance: +@example +silenceremove=1:5:0.02 +@end example -@item in_w, in_h -The input video width and height. +@item +Trim all silence encountered from begining to end where there is more than 1 +second of silence in audio: +@example +silenceremove=0:0:0:-1:1:-90dB +@end example +@end itemize -@item iw, ih -These are the same as @var{in_w} and @var{in_h}. +@section sofalizer -@item out_w, out_h -The output width and height (the size of the padded area), as -specified by the @var{width} and @var{height} expressions. +SOFAlizer uses head-related transfer functions (HRTFs) to create virtual +loudspeakers around the user for binaural listening via headphones (audio +formats up to 9 channels supported). +The HRTFs are stored in SOFA files (see @url{http://www.sofacoustics.org/} for a database). +SOFAlizer is developed at the Acoustics Research Institute (ARI) of the +Austrian Academy of Sciences. -@item ow, oh -These are the same as @var{out_w} and @var{out_h}. +To enable compilation of this filter you need to configure FFmpeg with +@code{--enable-netcdf}. -@item x, y -The x and y offsets as specified by the @var{x} and @var{y} -expressions, or NAN if not yet specified. +The filter accepts the following options: -@item a -The input display aspect ratio, same as @var{iw} / @var{ih}. +@table @option +@item sofa +Set the SOFA file used for rendering. -@item hsub, vsub -The horizontal and vertical chroma subsample values. For example for the -pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1. -@end table +@item gain +Set gain applied to audio. Value is in dB. Default is 0. -Some examples: +@item rotation +Set rotation of virtual loudspeakers in deg. Default is 0. -@example -# Add paddings with the color "violet" to the input video. The output video -# size is 640x480, and the top-left corner of the input video is placed at -# column 0, row 40 -pad=width=640:height=480:x=0:y=40:color=violet +@item elevation +Set elevation of virtual speakers in deg. Default is 0. -# Pad the input to get an output with dimensions increased by 3/2, -# and put the input video at the center of the padded area -pad="3/2*iw:3/2*ih:(ow-iw)/2:(oh-ih)/2" +@item radius +Set distance in meters between loudspeakers and the listener with near-field +HRTFs. Default is 1. -# Pad the input to get a squared output with size equal to the maximum -# value between the input width and height, and put the input video at -# the center of the padded area -pad="max(iw\,ih):ow:(ow-iw)/2:(oh-ih)/2" +@item type +Set processing type. Can be @var{time} or @var{freq}. @var{time} is +processing audio in time domain which is slow. +@var{freq} is processing audio in frequency domain which is fast. +Default is @var{freq}. + +@item speakers +Set custom positions of virtual loudspeakers. Syntax for this option is: + [| |...]. +Each virtual loudspeaker is described with short channel name following with +azimuth and elevation in degreees. +Each virtual loudspeaker description is separated by '|'. +For example to override front left and front right channel positions use: +'speakers=FL 45 15|FR 345 15'. +Descriptions with unrecognised channel names are ignored. +@end table -# Pad the input to get a final w/h ratio of 16:9 -pad="ih*16/9:ih:(ow-iw)/2:(oh-ih)/2" +@subsection Examples -# Double the output size and put the input video in the bottom-right -# corner of the output padded area -pad="2*iw:2*ih:ow-iw:oh-ih" +@itemize +@item +Using ClubFritz6 sofa file: +@example +sofalizer=sofa=/path/to/ClubFritz6.sofa:type=freq:radius=1 @end example -@section pixdesctest - -Pixel format descriptor test filter, mainly useful for internal -testing. The output video should be equal to the input video. - -For example: +@item +Using ClubFritz12 sofa file and bigger radius with small rotation: @example -format=monow, pixdesctest +sofalizer=sofa=/path/to/ClubFritz12.sofa:type=freq:radius=2:rotation=5 @end example -can be used to test the monowhite pixel format descriptor definition. +@item +Similar as above but with custom speaker positions for front left, front right, rear left and rear right +and also with custom gain: +@example +"sofalizer=sofa=/path/to/ClubFritz6.sofa:type=freq:radius=2:speakers=FL 45|FR 315|RL 135|RR 225:gain=28" +@end example +@end itemize -@anchor{scale} -@section scale +@section stereotools -Scale the input video and/or convert the image format. +This filter has some handy utilities to manage stereo signals, for converting +M/S stereo recordings to L/R signal while having control over the parameters +or spreading the stereo image of master track. -It accepts the following parameters: +The filter accepts the following options: @table @option +@item level_in +Set input level before filtering for both channels. Defaults is 1. +Allowed range is from 0.015625 to 64. -@item w -The output video width. +@item level_out +Set output level after filtering for both channels. Defaults is 1. +Allowed range is from 0.015625 to 64. -@item h -The output video height. +@item balance_in +Set input balance between both channels. Default is 0. +Allowed range is from -1 to 1. -@end table +@item balance_out +Set output balance between both channels. Default is 0. +Allowed range is from -1 to 1. -The parameters @var{w} and @var{h} are expressions containing -the following constants: +@item softclip +Enable softclipping. Results in analog distortion instead of harsh digital 0dB +clipping. Disabled by default. -@table @option -@item E, PI, PHI -These are approximated values for the mathematical constants e -(Euler's number), pi (Greek pi), and phi (the golden ratio). +@item mutel +Mute the left channel. Disabled by default. -@item in_w, in_h -The input width and height. +@item muter +Mute the right channel. Disabled by default. -@item iw, ih -These are the same as @var{in_w} and @var{in_h}. +@item phasel +Change the phase of the left channel. Disabled by default. -@item out_w, out_h -The output (cropped) width and height. +@item phaser +Change the phase of the right channel. Disabled by default. -@item ow, oh -These are the same as @var{out_w} and @var{out_h}. +@item mode +Set stereo mode. Available values are: -@item a -This is the same as @var{iw} / @var{ih}. +@table @samp +@item lr>lr +Left/Right to Left/Right, this is default. -@item sar -input sample aspect ratio +@item lr>ms +Left/Right to Mid/Side. -@item dar -The input display aspect ratio; it is the same as -(@var{iw} / @var{ih}) * @var{sar}. +@item ms>lr +Mid/Side to Left/Right. -@item hsub, vsub -The horizontal and vertical chroma subsample values. For example, for the -pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1. +@item lr>ll +Left/Right to Left/Left. + +@item lr>rr +Left/Right to Right/Right. + +@item lr>l+r +Left/Right to Left + Right. + +@item lr>rl +Left/Right to Right/Left. @end table -If the input image format is different from the format requested by -the next filter, the scale filter will convert the input to the -requested format. +@item slev +Set level of side signal. Default is 1. +Allowed range is from 0.015625 to 64. -If the value for @var{w} or @var{h} is 0, the respective input -size is used for the output. +@item sbal +Set balance of side signal. Default is 0. +Allowed range is from -1 to 1. -If the value for @var{w} or @var{h} is -1, the scale filter will use, for the -respective output size, a value that maintains the aspect ratio of the input -image. +@item mlev +Set level of the middle signal. Default is 1. +Allowed range is from 0.015625 to 64. -The default value of @var{w} and @var{h} is 0. +@item mpan +Set middle signal pan. Default is 0. Allowed range is from -1 to 1. -Some examples: +@item base +Set stereo base between mono and inversed channels. Default is 0. +Allowed range is from -1 to 1. + +@item delay +Set delay in milliseconds how much to delay left from right channel and +vice versa. Default is 0. Allowed range is from -20 to 20. + +@item sclevel +Set S/C level. Default is 1. Allowed range is from 1 to 100. + +@item phase +Set the stereo phase in degrees. Default is 0. Allowed range is from 0 to 360. +@end table + +@subsection Examples + +@itemize +@item +Apply karaoke like effect: @example -# Scale the input video to a size of 200x100 -scale=w=200:h=100 +stereotools=mlev=0.015625 +@end example -# Scale the input to 2x -scale=w=2*iw:h=2*ih -# The above is the same as -scale=2*in_w:2*in_h +@item +Convert M/S signal to L/R: +@example +"stereotools=mode=ms>lr" +@end example +@end itemize -# Scale the input to half the original size -scale=w=iw/2:h=ih/2 +@section stereowiden -# Increase the width, and set the height to the same size -scale=3/2*iw:ow +This filter enhance the stereo effect by suppressing signal common to both +channels and by delaying the signal of left into right and vice versa, +thereby widening the stereo effect. -# Seek Greek harmony -scale=iw:1/PHI*iw -scale=ih*PHI:ih +The filter accepts the following options: -# Increase the height, and set the width to 3/2 of the height -scale=w=3/2*oh:h=3/5*ih +@table @option +@item delay +Time in milliseconds of the delay of left signal into right and vice versa. +Default is 20 milliseconds. -# Increase the size, making the size a multiple of the chroma -scale="trunc(3/2*iw/hsub)*hsub:trunc(3/2*ih/vsub)*vsub" +@item feedback +Amount of gain in delayed signal into right and vice versa. Gives a delay +effect of left signal in right output and vice versa which gives widening +effect. Default is 0.3. -# Increase the width to a maximum of 500 pixels, -# keeping the same aspect ratio as the input -scale=w='min(500\, iw*3/2):h=-1' -@end example +@item crossfeed +Cross feed of left into right with inverted phase. This helps in suppressing +the mono. If the value is 1 it will cancel all the signal common to both +channels. Default is 0.3. + +@item drymix +Set level of input signal of original channel. Default is 0.8. +@end table + @section scale_npp + + Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel + format conversion on CUDA video frames. Setting the output width and height + works in the same way as for the @var{scale} filter. + + The following additional options are accepted: + @table @option + @item format + The pixel format of the output CUDA frames. If set to the string "same" (the + default), the input format will be kept. Note that automatic format negotiation + and conversion is not yet supported for hardware frames + + @item interp_algo + The interpolation algorithm used for resizing. One of the following: + @table @option + @item nn + Nearest neighbour. + + @item linear + @item cubic + @item cubic2p_bspline + 2-parameter cubic (B=1, C=0) + + @item cubic2p_catmullrom + 2-parameter cubic (B=0, C=1/2) + + @item cubic2p_b05c03 + 2-parameter cubic (B=1/2, C=3/10) + + @item super + Supersampling + + @item lanczos + @end table + + @end table + + @section select + Select frames to pass in output. + -It accepts the following parameters: +@section treble + +Boost or cut treble (upper) frequencies of the audio using a two-pole +shelving filter with a response similar to that of a standard +hi-fi's tone-controls. This is also known as shelving equalisation (EQ). + +The filter accepts the following options: @table @option +@item gain, g +Give the gain at whichever is the lower of ~22 kHz and the +Nyquist frequency. Its useful range is about -20 (for a large cut) +to +20 (for a large boost). Beware of clipping when using a positive gain. + +@item frequency, f +Set the filter's central frequency and so can be used +to extend or reduce the frequency range to be boosted or cut. +The default value is @code{3000} Hz. + +@item width_type +Set method to specify band-width of filter. +@table @option +@item h +Hz +@item q +Q-Factor +@item o +octave +@item s +slope +@end table -@item expr -An expression, which is evaluated for each input frame. If the expression is -evaluated to a non-zero value, the frame is selected and passed to the output, -otherwise it is discarded. +@item width, w +Determine how steep is the filter's shelf transition. +@end table +@section tremolo + +Sinusoidal amplitude modulation. + +The filter accepts the following options: + +@table @option +@item f +Modulation frequency in Hertz. Modulation frequencies in the subharmonic range +(20 Hz or lower) will result in a tremolo effect. +This filter may also be used as a ring modulator by specifying +a modulation frequency higher than 20 Hz. +Range is 0.1 - 20000.0. Default value is 5.0 Hz. + +@item d +Depth of modulation as a percentage. Range is 0.0 - 1.0. +Default value is 0.5. @end table -The expression can contain the following constants: +@section vibrato + +Sinusoidal phase modulation. + +The filter accepts the following options: @table @option -@item E, PI, PHI -These are approximated values for the mathematical constants e -(Euler's number), pi (Greek pi), and phi (the golden ratio). +@item f +Modulation frequency in Hertz. +Range is 0.1 - 20000.0. Default value is 5.0 Hz. -@item n -The (sequential) number of the filtered frame, starting from 0. +@item d +Depth of modulation as a percentage. Range is 0.0 - 1.0. +Default value is 0.5. +@end table -@item selected_n -The (sequential) number of the selected frame, starting from 0. +@section volume -@item prev_selected_n -The sequential number of the last selected frame. It's NAN if undefined. +Adjust the input audio volume. -@item TB -The timebase of the input timestamps. +It accepts the following parameters: +@table @option -@item pts -The PTS (Presentation TimeStamp) of the filtered video frame, -expressed in @var{TB} units. It's NAN if undefined. +@item volume +Set audio volume expression. -@item t -The PTS of the filtered video frame, -expressed in seconds. It's NAN if undefined. +Output values are clipped to the maximum value. -@item prev_pts -The PTS of the previously filtered video frame. It's NAN if undefined. +The output audio volume is given by the relation: +@example +@var{output_volume} = @var{volume} * @var{input_volume} +@end example -@item prev_selected_pts -The PTS of the last previously filtered video frame. It's NAN if undefined. +The default value for @var{volume} is "1.0". -@item prev_selected_t -The PTS of the last previously selected video frame. It's NAN if undefined. +@item precision +This parameter represents the mathematical precision. -@item start_pts -The PTS of the first video frame in the video. It's NAN if undefined. +It determines which input sample formats will be allowed, which affects the +precision of the volume scaling. -@item start_t -The time of the first video frame in the video. It's NAN if undefined. +@table @option +@item fixed +8-bit fixed-point; this limits input sample format to U8, S16, and S32. +@item float +32-bit floating-point; this limits input sample format to FLT. (default) +@item double +64-bit floating-point; this limits input sample format to DBL. +@end table + +@item replaygain +Choose the behaviour on encountering ReplayGain side data in input frames. -@item pict_type -The type of the filtered frame. It can assume one of the following -values: @table @option -@item I -@item P -@item B -@item S -@item SI -@item SP -@item BI +@item drop +Remove ReplayGain side data, ignoring its contents (the default). + +@item ignore +Ignore ReplayGain side data, but leave it in the frame. + +@item track +Prefer the track gain, if present. + +@item album +Prefer the album gain, if present. @end table -@item interlace_type -The frame interlace type. It can assume one of the following values: +@item replaygain_preamp +Pre-amplification gain in dB to apply to the selected replaygain gain. + +Default value for @var{replaygain_preamp} is 0.0. + +@item eval +Set when the volume expression is evaluated. + +It accepts the following values: +@table @samp +@item once +only evaluate expression once during the filter initialization, or +when the @samp{volume} command is sent + +@item frame +evaluate expression for each incoming frame +@end table + +Default value is @samp{once}. +@end table + +The volume expression can contain the following parameters. + @table @option -@item PROGRESSIVE -The frame is progressive (not interlaced). -@item TOPFIRST -The frame is top-field-first. -@item BOTTOMFIRST -The frame is bottom-field-first. +@item n +frame number (starting at zero) +@item nb_channels +number of channels +@item nb_consumed_samples +number of samples consumed by the filter +@item nb_samples +number of samples in the current frame +@item pos +original frame position in the file +@item pts +frame PTS +@item sample_rate +sample rate +@item startpts +PTS at start of stream +@item startt +time at start of stream +@item t +frame time +@item tb +timestamp timebase +@item volume +last set volume value @end table -@item key -This is 1 if the filtered frame is a key-frame, 0 otherwise. +Note that when @option{eval} is set to @samp{once} only the +@var{sample_rate} and @var{tb} variables are available, all other +variables will evaluate to NAN. + +@subsection Commands + +This filter supports the following commands: +@table @option +@item volume +Modify the volume expression. +The command accepts the same syntax of the corresponding option. + +If the specified expression is not valid, it is kept at its current +value. +@item replaygain_noclip +Prevent clipping by limiting the gain applied. + +Default value for @var{replaygain_noclip} is 1. @end table diff --cc libavfilter/Makefile index d71a17b790,39e167faab..917049c371 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@@ -199,52 -63,18 +199,53 @@@ OBJS-$(CONFIG_LUT3D_FILTER OBJS-$(CONFIG_LUT_FILTER) += vf_lut.o OBJS-$(CONFIG_LUTRGB_FILTER) += vf_lut.o OBJS-$(CONFIG_LUTYUV_FILTER) += vf_lut.o +OBJS-$(CONFIG_MASKEDMERGE_FILTER) += vf_maskedmerge.o framesync.o +OBJS-$(CONFIG_MCDEINT_FILTER) += vf_mcdeint.o +OBJS-$(CONFIG_MERGEPLANES_FILTER) += vf_mergeplanes.o framesync.o +OBJS-$(CONFIG_METADATA_FILTER) += f_metadata.o +OBJS-$(CONFIG_MPDECIMATE_FILTER) += vf_mpdecimate.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_lut.o +OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o +OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o OBJS-$(CONFIG_NULL_FILTER) += vf_null.o +OBJS-$(CONFIG_OCR_FILTER) += vf_ocr.o OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o -OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o +OBJS-$(CONFIG_OPENCL) += deshake_opencl.o unsharp_opencl.o +OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o dualinput.o framesync.o +OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o +OBJS-$(CONFIG_PALETTEGEN_FILTER) += vf_palettegen.o +OBJS-$(CONFIG_PALETTEUSE_FILTER) += vf_paletteuse.o dualinput.o framesync.o +OBJS-$(CONFIG_PERMS_FILTER) += f_perms.o +OBJS-$(CONFIG_PERSPECTIVE_FILTER) += vf_perspective.o +OBJS-$(CONFIG_PHASE_FILTER) += vf_phase.o OBJS-$(CONFIG_PIXDESCTEST_FILTER) += vf_pixdesctest.o +OBJS-$(CONFIG_PP_FILTER) += vf_pp.o +OBJS-$(CONFIG_PP7_FILTER) += vf_pp7.o +OBJS-$(CONFIG_PSNR_FILTER) += vf_psnr.o dualinput.o framesync.o +OBJS-$(CONFIG_PULLUP_FILTER) += vf_pullup.o +OBJS-$(CONFIG_QP_FILTER) += vf_qp.o +OBJS-$(CONFIG_RANDOM_FILTER) += vf_random.o +OBJS-$(CONFIG_READVITC_FILTER) += vf_readvitc.o +OBJS-$(CONFIG_REALTIME_FILTER) += f_realtime.o +OBJS-$(CONFIG_REMAP_FILTER) += vf_remap.o framesync.o +OBJS-$(CONFIG_REMOVEGRAIN_FILTER) += vf_removegrain.o +OBJS-$(CONFIG_REMOVELOGO_FILTER) += bbox.o lswsutils.o lavfutils.o vf_removelogo.o +OBJS-$(CONFIG_REPEATFIELDS_FILTER) += vf_repeatfields.o +OBJS-$(CONFIG_REVERSE_FILTER) += f_reverse.o +OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o +OBJS-$(CONFIG_SEPARATEFIELDS_FILTER) += vf_separatefields.o +OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o +OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o +OBJS-$(CONFIG_SELECT_FILTER) += f_select.o +OBJS-$(CONFIG_SELECTIVECOLOR_FILTER) += vf_selectivecolor.o +OBJS-$(CONFIG_SENDCMD_FILTER) += f_sendcmd.o + OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o -OBJS-$(CONFIG_SELECT_FILTER) += vf_select.o OBJS-$(CONFIG_SETDAR_FILTER) += vf_aspect.o +OBJS-$(CONFIG_SETFIELD_FILTER) += vf_setfield.o OBJS-$(CONFIG_SETPTS_FILTER) += setpts.o OBJS-$(CONFIG_SETSAR_FILTER) += vf_aspect.o OBJS-$(CONFIG_SETTB_FILTER) += settb.o diff --cc libavfilter/allfilters.c index 7f58c7e132,c6eeb1f4e4..a9725760fa --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@@ -220,51 -89,18 +220,52 @@@ void avfilter_register_all(void REGISTER_FILTER(LUT, lut, vf); REGISTER_FILTER(LUTRGB, lutrgb, vf); REGISTER_FILTER(LUTYUV, lutyuv, vf); + REGISTER_FILTER(MASKEDMERGE, maskedmerge, vf); + REGISTER_FILTER(MCDEINT, mcdeint, vf); + REGISTER_FILTER(MERGEPLANES, mergeplanes, vf); + REGISTER_FILTER(METADATA, metadata, vf); + REGISTER_FILTER(MPDECIMATE, mpdecimate, vf); REGISTER_FILTER(NEGATE, negate, vf); + REGISTER_FILTER(NNEDI, nnedi, vf); REGISTER_FILTER(NOFORMAT, noformat, vf); + REGISTER_FILTER(NOISE, noise, vf); REGISTER_FILTER(NULL, null, vf); + REGISTER_FILTER(OCR, ocr, vf); REGISTER_FILTER(OCV, ocv, vf); REGISTER_FILTER(OVERLAY, overlay, vf); + REGISTER_FILTER(OWDENOISE, owdenoise, vf); REGISTER_FILTER(PAD, pad, vf); + REGISTER_FILTER(PALETTEGEN, palettegen, vf); + REGISTER_FILTER(PALETTEUSE, paletteuse, vf); + REGISTER_FILTER(PERMS, perms, vf); + REGISTER_FILTER(PERSPECTIVE, perspective, vf); + REGISTER_FILTER(PHASE, phase, vf); REGISTER_FILTER(PIXDESCTEST, pixdesctest, vf); + REGISTER_FILTER(PP, pp, vf); + REGISTER_FILTER(PP7, pp7, vf); + REGISTER_FILTER(PSNR, psnr, vf); + REGISTER_FILTER(PULLUP, pullup, vf); + REGISTER_FILTER(QP, qp, vf); + REGISTER_FILTER(RANDOM, random, vf); + REGISTER_FILTER(READVITC, readvitc, vf); + REGISTER_FILTER(REALTIME, realtime, vf); + REGISTER_FILTER(REMAP, remap, vf); + REGISTER_FILTER(REMOVEGRAIN, removegrain, vf); + REGISTER_FILTER(REMOVELOGO, removelogo, vf); + REGISTER_FILTER(REPEATFIELDS, repeatfields, vf); + REGISTER_FILTER(REVERSE, reverse, vf); + REGISTER_FILTER(ROTATE, rotate, vf); + REGISTER_FILTER(SAB, sab, vf); REGISTER_FILTER(SCALE, scale, vf); + REGISTER_FILTER(SCALE2REF, scale2ref, vf); + REGISTER_FILTER(SCALE_NPP, scale_npp, vf); REGISTER_FILTER(SCALE_VAAPI, scale_vaapi, vf); REGISTER_FILTER(SELECT, select, vf); + REGISTER_FILTER(SELECTIVECOLOR, selectivecolor, vf); + REGISTER_FILTER(SENDCMD, sendcmd, vf); + REGISTER_FILTER(SEPARATEFIELDS, separatefields, vf); REGISTER_FILTER(SETDAR, setdar, vf); + REGISTER_FILTER(SETFIELD, setfield, vf); REGISTER_FILTER(SETPTS, setpts, vf); REGISTER_FILTER(SETSAR, setsar, vf); REGISTER_FILTER(SETTB, settb, vf); diff --cc libavfilter/version.h index 927ec27e55,c8102dd7d0..d7f9c546a9 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@@ -29,9 -29,9 +29,9 @@@ #include "libavutil/version.h" -#define LIBAVFILTER_VERSION_MAJOR 6 -#define LIBAVFILTER_VERSION_MINOR 4 -#define LIBAVFILTER_VERSION_MICRO 0 +#define LIBAVFILTER_VERSION_MAJOR 6 - #define LIBAVFILTER_VERSION_MINOR 43 - #define LIBAVFILTER_VERSION_MICRO 101 ++#define LIBAVFILTER_VERSION_MINOR 44 ++#define LIBAVFILTER_VERSION_MICRO 100 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ LIBAVFILTER_VERSION_MINOR, \ diff --cc libavfilter/vf_scale_npp.c index 0000000000,247baf1133..7d2b5df233 mode 000000,100644..100644 --- a/libavfilter/vf_scale_npp.c +++ b/libavfilter/vf_scale_npp.c @@@ -1,0 -1,660 +1,660 @@@ + /* - * This file is part of Libav. ++ * This file is part of FFmpeg. + * - * Libav is free software; you can redistribute it and/or ++ * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * - * Libav is distributed in the hope that it will be useful, ++ * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software ++ * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + /** + * @file + * scale video filter + */ + + #include + #include + #include + + #include "libavutil/avstring.h" + #include "libavutil/common.h" + #include "libavutil/eval.h" + #include "libavutil/hwcontext.h" + #include "libavutil/hwcontext_cuda.h" + #include "libavutil/internal.h" + #include "libavutil/mathematics.h" + #include "libavutil/opt.h" + #include "libavutil/pixdesc.h" + + #include "avfilter.h" + #include "formats.h" + #include "internal.h" + #include "video.h" + + static const enum AVPixelFormat supported_formats[] = { + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_NV12, + AV_PIX_FMT_YUV444P, + }; + + static const enum AVPixelFormat deinterleaved_formats[][2] = { + { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P }, + }; + + static const char *const var_names[] = { + "PI", + "PHI", + "E", + "in_w", "iw", + "in_h", "ih", + "out_w", "ow", + "out_h", "oh", + "a", "dar", + "sar", + NULL + }; + + enum var_name { + VAR_PI, + VAR_PHI, + VAR_E, + VAR_IN_W, VAR_IW, + VAR_IN_H, VAR_IH, + VAR_OUT_W, VAR_OW, + VAR_OUT_H, VAR_OH, + VAR_A, VAR_DAR, + VAR_SAR, + VARS_NB + }; + + enum ScaleStage { + STAGE_DEINTERLEAVE, + STAGE_RESIZE, + STAGE_INTERLEAVE, + STAGE_NB, + }; + + typedef struct NPPScaleStageContext { + int stage_needed; + enum AVPixelFormat in_fmt; + enum AVPixelFormat out_fmt; + + struct { + int width; + int height; + } planes_in[3], planes_out[3]; + + AVBufferRef *frames_ctx; + AVFrame *frame; + } NPPScaleStageContext; + + typedef struct NPPScaleContext { + const AVClass *class; + + NPPScaleStageContext stages[STAGE_NB]; + AVFrame *tmp_frame; + int passthrough; + + int shift_width, shift_height; + + /** + * New dimensions. Special values are: + * 0 = original width/height + * -1 = keep original aspect + */ + int w, h; + + /** + * Output sw format. AV_PIX_FMT_NONE for no conversion. + */ + enum AVPixelFormat format; + + char *w_expr; ///< width expression string + char *h_expr; ///< height expression string + char *format_str; + + int interp_algo; + } NPPScaleContext; + + static int nppscale_init(AVFilterContext *ctx) + { + NPPScaleContext *s = ctx->priv; + int i; + + if (!strcmp(s->format_str, "same")) { + s->format = AV_PIX_FMT_NONE; + } else { + s->format = av_get_pix_fmt(s->format_str); + if (s->format == AV_PIX_FMT_NONE) { + av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); + return AVERROR(EINVAL); + } + } + + for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) { + s->stages[i].frame = av_frame_alloc(); + if (!s->stages[i].frame) + return AVERROR(ENOMEM); + } + s->tmp_frame = av_frame_alloc(); + if (!s->tmp_frame) + return AVERROR(ENOMEM); + + return 0; + } + + static void nppscale_uninit(AVFilterContext *ctx) + { + NPPScaleContext *s = ctx->priv; + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) { + av_frame_free(&s->stages[i].frame); + av_buffer_unref(&s->stages[i].frames_ctx); + } + av_frame_free(&s->tmp_frame); + } + + static int nppscale_query_formats(AVFilterContext *ctx) + { + static const enum AVPixelFormat pixel_formats[] = { + AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE, + }; + AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); + + ff_set_common_formats(ctx, pix_fmts); + + return 0; + } + + static int init_stage(NPPScaleStageContext *stage, AVBufferRef *device_ctx) + { + AVBufferRef *out_ref = NULL; + AVHWFramesContext *out_ctx; + int in_sw, in_sh, out_sw, out_sh; + int ret, i; + + av_pix_fmt_get_chroma_sub_sample(stage->in_fmt, &in_sw, &in_sh); + av_pix_fmt_get_chroma_sub_sample(stage->out_fmt, &out_sw, &out_sh); + if (!stage->planes_out[0].width) { + stage->planes_out[0].width = stage->planes_in[0].width; + stage->planes_out[0].height = stage->planes_in[0].height; + } + + for (i = 1; i < FF_ARRAY_ELEMS(stage->planes_in); i++) { + stage->planes_in[i].width = stage->planes_in[0].width >> in_sw; + stage->planes_in[i].height = stage->planes_in[0].height >> in_sh; + stage->planes_out[i].width = stage->planes_out[0].width >> out_sw; + stage->planes_out[i].height = stage->planes_out[0].height >> out_sh; + } + + out_ref = av_hwframe_ctx_alloc(device_ctx); + if (!out_ref) + return AVERROR(ENOMEM); + out_ctx = (AVHWFramesContext*)out_ref->data; + + out_ctx->format = AV_PIX_FMT_CUDA; + out_ctx->sw_format = stage->out_fmt; + out_ctx->width = FFALIGN(stage->planes_out[0].width, 32); + out_ctx->height = FFALIGN(stage->planes_out[0].height, 32); + + ret = av_hwframe_ctx_init(out_ref); + if (ret < 0) + goto fail; + + av_frame_unref(stage->frame); + ret = av_hwframe_get_buffer(out_ref, stage->frame, 0); + if (ret < 0) + goto fail; + + stage->frame->width = stage->planes_out[0].width; + stage->frame->height = stage->planes_out[0].height; + + av_buffer_unref(&stage->frames_ctx); + stage->frames_ctx = out_ref; + + return 0; + fail: + av_buffer_unref(&out_ref); + return ret; + } + + static int format_is_supported(enum AVPixelFormat fmt) + { + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) + if (supported_formats[i] == fmt) + return 1; + return 0; + } + + static enum AVPixelFormat get_deinterleaved_format(enum AVPixelFormat fmt) + { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); + int i, planes; + + planes = av_pix_fmt_count_planes(fmt); + if (planes == desc->nb_components) + return fmt; + for (i = 0; i < FF_ARRAY_ELEMS(deinterleaved_formats); i++) + if (deinterleaved_formats[i][0] == fmt) + return deinterleaved_formats[i][1]; + return AV_PIX_FMT_NONE; + } + + static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, + int out_width, int out_height) + { + NPPScaleContext *s = ctx->priv; + + AVHWFramesContext *in_frames_ctx; + + enum AVPixelFormat in_format; + enum AVPixelFormat out_format; + enum AVPixelFormat in_deinterleaved_format; + enum AVPixelFormat out_deinterleaved_format; + + int i, ret, last_stage = -1; + + /* check that we have a hw context */ + if (!ctx->inputs[0]->hw_frames_ctx) { + av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); + return AVERROR(EINVAL); + } + in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; + in_format = in_frames_ctx->sw_format; + out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format; + + if (!format_is_supported(in_format)) { + av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", + av_get_pix_fmt_name(in_format)); + return AVERROR(ENOSYS); + } + if (!format_is_supported(out_format)) { + av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", + av_get_pix_fmt_name(out_format)); + return AVERROR(ENOSYS); + } + + in_deinterleaved_format = get_deinterleaved_format(in_format); + out_deinterleaved_format = get_deinterleaved_format(out_format); + if (in_deinterleaved_format == AV_PIX_FMT_NONE || + out_deinterleaved_format == AV_PIX_FMT_NONE) + return AVERROR_BUG; + + /* figure out which stages need to be done */ + if (in_width != out_width || in_height != out_height || + in_deinterleaved_format != out_deinterleaved_format) + s->stages[STAGE_RESIZE].stage_needed = 1; + + if (!s->stages[STAGE_RESIZE].stage_needed && in_format == out_format) + s->passthrough = 1; + + if (!s->passthrough) { + if (in_format != in_deinterleaved_format) + s->stages[STAGE_DEINTERLEAVE].stage_needed = 1; + if (out_format != out_deinterleaved_format) + s->stages[STAGE_INTERLEAVE].stage_needed = 1; + } + + s->stages[STAGE_DEINTERLEAVE].in_fmt = in_format; + s->stages[STAGE_DEINTERLEAVE].out_fmt = in_deinterleaved_format; + s->stages[STAGE_DEINTERLEAVE].planes_in[0].width = in_width; + s->stages[STAGE_DEINTERLEAVE].planes_in[0].height = in_height; + + s->stages[STAGE_RESIZE].in_fmt = in_deinterleaved_format; + s->stages[STAGE_RESIZE].out_fmt = out_deinterleaved_format; + s->stages[STAGE_RESIZE].planes_in[0].width = in_width; + s->stages[STAGE_RESIZE].planes_in[0].height = in_height; + s->stages[STAGE_RESIZE].planes_out[0].width = out_width; + s->stages[STAGE_RESIZE].planes_out[0].height = out_height; + + s->stages[STAGE_INTERLEAVE].in_fmt = out_deinterleaved_format; + s->stages[STAGE_INTERLEAVE].out_fmt = out_format; + s->stages[STAGE_INTERLEAVE].planes_in[0].width = out_width; + s->stages[STAGE_INTERLEAVE].planes_in[0].height = out_height; + + /* init the hardware contexts */ + for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) { + if (!s->stages[i].stage_needed) + continue; + + ret = init_stage(&s->stages[i], in_frames_ctx->device_ref); + if (ret < 0) + return ret; + + last_stage = i; + } + + if (last_stage < 0) + return AVERROR_BUG; + ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->stages[last_stage].frames_ctx); + if (!ctx->outputs[0]->hw_frames_ctx) + return AVERROR(ENOMEM); + + return 0; + } + + static int nppscale_config_props(AVFilterLink *outlink) + { + AVFilterContext *ctx = outlink->src; + AVFilterLink *inlink = outlink->src->inputs[0]; + NPPScaleContext *s = ctx->priv; + int64_t w, h; + double var_values[VARS_NB], res; + char *expr; + int ret; + + var_values[VAR_PI] = M_PI; + var_values[VAR_PHI] = M_PHI; + var_values[VAR_E] = M_E; + var_values[VAR_IN_W] = var_values[VAR_IW] = inlink->w; + var_values[VAR_IN_H] = var_values[VAR_IH] = inlink->h; + var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN; + var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN; + var_values[VAR_A] = (double) inlink->w / inlink->h; + var_values[VAR_SAR] = inlink->sample_aspect_ratio.num ? + (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1; + var_values[VAR_DAR] = var_values[VAR_A] * var_values[VAR_SAR]; + + /* evaluate width and height */ + av_expr_parse_and_eval(&res, (expr = s->w_expr), + var_names, var_values, + NULL, NULL, NULL, NULL, NULL, 0, ctx); + s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res; + if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr), + var_names, var_values, + NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) + goto fail; + s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res; + /* evaluate again the width, as it may depend on the output height */ + if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr), + var_names, var_values, + NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) + goto fail; + s->w = res; + + w = s->w; + h = s->h; + + /* sanity check params */ + if (w < -1 || h < -1) { + av_log(ctx, AV_LOG_ERROR, "Size values less than -1 are not acceptable.\n"); + return AVERROR(EINVAL); + } + if (w == -1 && h == -1) + s->w = s->h = 0; + + if (!(w = s->w)) + w = inlink->w; + if (!(h = s->h)) + h = inlink->h; + if (w == -1) + w = av_rescale(h, inlink->w, inlink->h); + if (h == -1) + h = av_rescale(w, inlink->h, inlink->w); + + if (w > INT_MAX || h > INT_MAX || + (h * inlink->w) > INT_MAX || + (w * inlink->h) > INT_MAX) + av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); + + outlink->w = w; + outlink->h = h; + + ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h); + if (ret < 0) + return ret; + + av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n", + inlink->w, inlink->h, outlink->w, outlink->h); + + if (inlink->sample_aspect_ratio.num) + outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, + outlink->w*inlink->h}, + inlink->sample_aspect_ratio); + else + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + + return 0; + + fail: + av_log(NULL, AV_LOG_ERROR, + "Error when evaluating the expression '%s'\n", expr); + return ret; + } + + static int nppscale_deinterleave(AVFilterContext *ctx, NPPScaleStageContext *stage, + AVFrame *out, AVFrame *in) + { + AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data; + NppStatus err; + + switch (in_frames_ctx->sw_format) { + case AV_PIX_FMT_NV12: + err = nppiYCbCr420_8u_P2P3R(in->data[0], in->linesize[0], + in->data[1], in->linesize[1], + out->data, out->linesize, + (NppiSize){ in->width, in->height }); + break; + default: + return AVERROR_BUG; + } + if (err != NPP_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err); + return AVERROR_UNKNOWN; + } + + return 0; + } + + static int nppscale_resize(AVFilterContext *ctx, NPPScaleStageContext *stage, + AVFrame *out, AVFrame *in) + { + NPPScaleContext *s = ctx->priv; + NppStatus err; + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) { + int iw = stage->planes_in[i].width; + int ih = stage->planes_in[i].height; + int ow = stage->planes_out[i].width; + int oh = stage->planes_out[i].height; + + err = nppiResizeSqrPixel_8u_C1R(in->data[i], (NppiSize){ iw, ih }, + in->linesize[i], (NppiRect){ 0, 0, iw, ih }, + out->data[i], out->linesize[i], + (NppiRect){ 0, 0, ow, oh }, + (double)ow / iw, (double)oh / ih, + 0.0, 0.0, s->interp_algo); + if (err != NPP_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "NPP resize error: %d\n", err); + return AVERROR_UNKNOWN; + } + } + + return 0; + } + + static int nppscale_interleave(AVFilterContext *ctx, NPPScaleStageContext *stage, + AVFrame *out, AVFrame *in) + { + AVHWFramesContext *out_frames_ctx = (AVHWFramesContext*)out->hw_frames_ctx->data; + NppStatus err; + + switch (out_frames_ctx->sw_format) { + case AV_PIX_FMT_NV12: + err = nppiYCbCr420_8u_P3P2R((const uint8_t**)in->data, + in->linesize, + out->data[0], out->linesize[0], + out->data[1], out->linesize[1], + (NppiSize){ in->width, in->height }); + break; + default: + return AVERROR_BUG; + } + if (err != NPP_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err); + return AVERROR_UNKNOWN; + } + + return 0; + } + + static int (*const nppscale_process[])(AVFilterContext *ctx, NPPScaleStageContext *stage, + AVFrame *out, AVFrame *in) = { + [STAGE_DEINTERLEAVE] = nppscale_deinterleave, + [STAGE_RESIZE] = nppscale_resize, + [STAGE_INTERLEAVE] = nppscale_interleave, + }; + + static int nppscale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in) + { + NPPScaleContext *s = ctx->priv; + AVFrame *src = in; + int i, ret, last_stage = -1; + + for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) { + if (!s->stages[i].stage_needed) + continue; + + ret = nppscale_process[i](ctx, &s->stages[i], s->stages[i].frame, src); + if (ret < 0) + return ret; + + src = s->stages[i].frame; + last_stage = i; + } + + if (last_stage < 0) + return AVERROR_BUG; + ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); + if (ret < 0) + return ret; + + av_frame_move_ref(out, src); + av_frame_move_ref(src, s->tmp_frame); + + ret = av_frame_copy_props(out, in); + if (ret < 0) + return ret; + + return 0; + } + + static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) + { + AVFilterContext *ctx = link->dst; + NPPScaleContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data; + AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; + + AVFrame *out = NULL; + CUresult err; + CUcontext dummy; + int ret = 0; + + if (s->passthrough) + return ff_filter_frame(outlink, in); + + out = av_frame_alloc(); + if (!out) { + ret = AVERROR(ENOMEM); + goto fail; + } + + av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den, + (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w, + (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, + INT_MAX); + + err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + if (err != CUDA_SUCCESS) { + ret = AVERROR_UNKNOWN; + goto fail; + } + + ret = nppscale_scale(ctx, out, in); + + cuCtxPopCurrent(&dummy); + if (ret < 0) + goto fail; + + av_frame_free(&in); + return ff_filter_frame(outlink, out); + fail: + av_frame_free(&in); + av_frame_free(&out); + return ret; + } + + #define OFFSET(x) offsetof(NPPScaleContext, x) + #define FLAGS AV_OPT_FLAG_VIDEO_PARAM + static const AVOption options[] = { + { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, + { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, + { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, + + { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = NPPI_INTER_CUBIC }, 0, INT_MAX, FLAGS, "interp_algo" }, + { "nn", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_NN }, 0, 0, FLAGS, "interp_algo" }, + { "linear", "linear", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_LINEAR }, 0, 0, FLAGS, "interp_algo" }, + { "cubic", "cubic", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC }, 0, 0, FLAGS, "interp_algo" }, + { "cubic2p_bspline", "2-parameter cubic (B=1, C=0)", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_BSPLINE }, 0, 0, FLAGS, "interp_algo" }, + { "cubic2p_catmullrom", "2-parameter cubic (B=0, C=1/2)", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_CATMULLROM }, 0, 0, FLAGS, "interp_algo" }, + { "cubic2p_b05c03", "2-parameter cubic (B=1/2, C=3/10)", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_B05C03 }, 0, 0, FLAGS, "interp_algo" }, + { "super", "supersampling", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_SUPER }, 0, 0, FLAGS, "interp_algo" }, + { "lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_LANCZOS }, 0, 0, FLAGS, "interp_algo" }, + { NULL }, + }; + + static const AVClass nppscale_class = { + .class_name = "nppscale", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, + }; + + static const AVFilterPad nppscale_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = nppscale_filter_frame, + }, + { NULL } + }; + + static const AVFilterPad nppscale_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = nppscale_config_props, + }, + { NULL } + }; + + AVFilter ff_vf_scale_npp = { + .name = "scale_npp", + .description = NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video " + "scaling and format conversion"), + + .init = nppscale_init, + .uninit = nppscale_uninit, + .query_formats = nppscale_query_formats, + + .priv_size = sizeof(NPPScaleContext), + .priv_class = &nppscale_class, + + .inputs = nppscale_inputs, + .outputs = nppscale_outputs, + };