很多时候为了方便收听视频文件中的音频信息,我们会将视频文件中的音频流转码输出成音频文件,方便在对应的平台上进行播放。这里就介绍一下如何通过FFmpeg将视频文件中的音频流转码成特定编码格式的音频文件。

转码过程中我们先对视频文件进行解封装得到音频流,然后通过对应的音频解码器对音频流进行解码得到原始的音频帧。得到音频帧之后,我们就可以通过FFmpeg提供的各种滤镜对音频参数进行调整了,调整的参数包括:采样格式、采样率、通道布局、比特率。

下面介绍一下各个参数:
1.采样格式
采样格式就是每个音频数据点的数据格式,支持下面的格式

enum AVSampleFormat {    AV_SAMPLE_FMT_NONE = -1,    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits    AV_SAMPLE_FMT_S16,         ///< signed 16 bits    AV_SAMPLE_FMT_S32,         ///< signed 32 bits    AV_SAMPLE_FMT_FLT,         ///< float    AV_SAMPLE_FMT_DBL,         ///< double    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar    AV_SAMPLE_FMT_FLTP,        ///< float, planar    AV_SAMPLE_FMT_DBLP,        ///< double, planar    AV_SAMPLE_FMT_S64,         ///< signed 64 bits    AV_SAMPLE_FMT_S64P,        ///< signed 64 bits, planar    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically};

2.采样率
采样的频率,指的是单位时间内每秒钟进行采样的次数,频率越高,离散的数据和连续的模拟信号的拟合就越接近,声音的质量也就越高,占的存储也就越大。 通常情况下,支持的采样率有22KHz/44KHz/48KHz等

3.通道布局
channels 为音频的通道数 1 2 3 4 5…
channel_layout 为音频通道格式类型如 单通道、双通道、立体声等等

4.码率
数据传输时单位时间传送的数据位数,一般我们用的单位是kbps即千位每秒。通俗一点的理解就是取样率,单位时间内取样率越大,精度就越高,处理出来的文件就越接近原始文件。

使用FFmpeg提取音频流进行转码并动态调整音频参数的示例如下所示:

extern "C" {#include #include #include #include #include }#include #include "audio_filter.h"//@1输出文件名称 @2输入文件名称 @3采样格式 @4采样率 @5通道布局  @6码率int extract_audio(const char *output_filename, const char *input_filename, AVSampleFormat sample_fmt,int sample_rate, uint64_t channel_layout, uint64_t bitrate) {AVFormatContext *inFmtCtx = nullptr;AVFormatContext *outFmtCtx = nullptr;AVCodecContext *aDecCtx = nullptr;AVCodecContext *aEncCtx = nullptr;AVStream *aOutStream = nullptr;int ret;//打开文件获取流信息ret = avformat_open_input(&inFmtCtx, input_filename, nullptr, nullptr);avformat_find_stream_info(inFmtCtx, nullptr);    avformat_alloc_output_context2(&outFmtCtx, nullptr, nullptr, output_filename);for (int i = 0; i < inFmtCtx->nb_streams; ++i) {AVStream *inStream = inFmtCtx->streams[i];if (inStream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO){//打开解码器AVCodec *decoder = avcodec_find_decoder(inStream->codecpar->codec_id);aDecCtx = avcodec_alloc_context3(decoder);ret = avcodec_parameters_to_context(aDecCtx, inStream->codecpar);ret = avcodec_open2(aDecCtx, decoder, nullptr);//创建音频编码器AVCodec *encoder = avcodec_find_encoder(outFmtCtx->oformat->audio_codec);aOutStream = avformat_new_stream(outFmtCtx, encoder);aOutStream->id = outFmtCtx->nb_streams - 1;aEncCtx = avcodec_alloc_context3(encoder);//指定编码器的参数aEncCtx->codec_id = encoder->id; //编码器IDaEncCtx->sample_fmt = sample_fmt ? sample_fmt : aDecCtx->sample_fmt; //采样格式aEncCtx->sample_rate = sample_rate ? sample_rate : aDecCtx->sample_rate;//采样率aEncCtx->channel_layout = channel_layout;//通道布局aEncCtx->channels = av_get_channel_layout_nb_channels(channel_layout);//通道数aEncCtx->bit_rate = bitrate ? bitrate : aDecCtx->bit_rate; //码率aEncCtx->time_base = { 1, aEncCtx->sample_rate }; //时间基aOutStream->time_base = aEncCtx->time_base;if (outFmtCtx->oformat->flags & AVFMT_GLOBALHEADER)aEncCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;ret = avcodec_open2(aEncCtx, encoder, nullptr);ret = avcodec_parameters_from_context(aOutStream->codecpar, aEncCtx);av_dict_copy(&aOutStream->metadata, inStream->metadata, 0);break;}}if (!(outFmtCtx->oformat->flags & AVFMT_NOFILE)) {ret = avio_open(&outFmtCtx->pb, output_filename, AVIO_FLAG_WRITE);if (ret < 0) {return -1;}}//写文件头ret = avformat_write_header(outFmtCtx, nullptr);if (ret < 0) {return -1;}AVFrame *inAudioFrame = av_frame_alloc();AVFrame *outAudioFrame = av_frame_alloc();outAudioFrame->format = aEncCtx->sample_fmt;outAudioFrame->sample_rate = aEncCtx->sample_rate;outAudioFrame->channel_layout = aEncCtx->channel_layout;outAudioFrame->nb_samples = aEncCtx->frame_size;ret = av_frame_get_buffer(outAudioFrame, 0);int64_t audio_pts = 0;//通过滤镜对音频帧进行处理操作AudioFilter filter;char description[512];AudioConfig inConfig(aDecCtx->sample_fmt, aDecCtx->sample_rate, aDecCtx->channel_layout, aDecCtx->time_base);AudioConfig outConfig(aEncCtx->sample_fmt, aEncCtx->sample_rate, aEncCtx->channel_layout, aEncCtx->time_base);char ch_layout[64];av_get_channel_layout_string(ch_layout, sizeof(ch_layout),av_get_channel_layout_nb_channels(aEncCtx->channel_layout), aEncCtx->channel_layout);snprintf(description, sizeof(description),"[in]aresample=sample_rate=%d[res];[res]aformat=sample_fmts=%s:sample_rates=%d:channel_layouts=%s[out]",aEncCtx->sample_rate,av_get_sample_fmt_name(aEncCtx->sample_fmt),aEncCtx->sample_rate,ch_layout);filter.create(description, &inConfig, &outConfig);while (true) {AVPacket inPacket{ nullptr };av_init_packet(&inPacket);ret = av_read_frame(inFmtCtx, &inPacket);if (ret == AVERROR_EOF) {break;}else if (ret < 0) {return -1;}        //调整完音频参数之后对音频帧进行编码if (inPacket.stream_index == AVMEDIA_TYPE_AUDIO) {avcodec_send_packet(aDecCtx, &inPacket);avcodec_receive_frame(aDecCtx, inAudioFrame);if (ret == 0) {ret = filter.addInput1(inAudioFrame);av_frame_unref(inAudioFrame);do {outAudioFrame->nb_samples = aEncCtx->frame_size;ret = filter.getFrame(outAudioFrame);if (ret == 0) {outAudioFrame->pts = audio_pts;audio_pts += outAudioFrame->nb_samples;ret = avcodec_send_frame(aEncCtx, outAudioFrame);}else {break;}do {AVPacket outPacket{ nullptr };av_init_packet(&outPacket);ret = avcodec_receive_packet(aEncCtx, &outPacket);if (ret == 0) {av_packet_rescale_ts(&outPacket, aEncCtx->time_base, aOutStream->time_base);outPacket.stream_index = aOutStream->index;ret = av_interleaved_write_frame(outFmtCtx, &outPacket);if (ret < 0) {break;}}else {break;}} while (true);} while (true);}}}//最后刷新音频数据int eof = 0;do {ret = filter.getFrame(outAudioFrame);if (ret == 0) {outAudioFrame->pts = audio_pts;audio_pts += outAudioFrame->nb_samples;}ret = avcodec_send_frame(aEncCtx, ret == 0 ? outAudioFrame : nullptr);do {AVPacket outPacket{ nullptr };ret = avcodec_receive_packet(aEncCtx, &outPacket);if (ret == 0) {av_packet_rescale_ts(&outPacket, aEncCtx->time_base, aOutStream->time_base);outPacket.stream_index = aOutStream->index;ret = av_interleaved_write_frame(outFmtCtx, &outPacket);if (ret < 0) {eof = 1;break;}}else if (ret == AVERROR_EOF) {eof = 1;break;}else {break;}} while (true);} while (!eof);filter.destroy();//清理编码器和解码器av_write_trailer(outFmtCtx);avformat_close_input(&inFmtCtx);av_frame_free(&inAudioFrame);av_frame_free(&outAudioFrame);avcodec_free_context(&aDecCtx);avcodec_free_context(&aEncCtx);avformat_free_context(inFmtCtx);avformat_free_context(outFmtCtx);return 0;}int main(int argc, char* argv[]){std::string input_file_path = std::string(argv[1]);std::string output_file_path = std::string(argv[2]);extract_audio(output_file_path.c_str(), input_file_path.c_str(), (AVSampleFormat)0, 48000, AV_CH_LAYOUT_STEREO, 0);}

这里用到的音频封装,在另一篇文章里面有,这里就不重复列举了,可以参考另一篇文章:
FFmpeg进阶: 给视频添加背景音乐