【发布时间】:2018-04-09 18:13:29
【问题描述】:
我正在使用 IMFSourceReader 从磁盘连续缓冲 1 秒的音频文件部分。我无法准确地寻找 M4A 音频数据(AAC 编码),这会导致音频流不连续。
我知道 IMFSourceReader.Read() 返回的数据通常相对于 IMFSourceReader.SetCurrentPosition() 中设置的位置偏移几百帧。但是,即使考虑到这个偏移量,我也无法创建连续的无故障流(参见 readCall == 0 条件)。
我能够准确地寻找部分 WAV 文件(未压缩),因此我的偏移量计算似乎是正确的。
我的问题是媒体基础库是否能够准确地查找/读取 AAC 编码的 M4A 文件(或任何压缩音频)的部分?
这是代码。 inStartFrame 是我要阅读的示例框架。输出格式配置为 32 位浮点数据(见最终函数)。为了稍微修剪一下,我删除了一些错误检查和清理,例如文件结尾。
bool WindowsM4AReader::read(float** outBuffer, int inNumChannels, int64_t inStartFrame, int64_t inNumFramesToRead)
{
int64_t hnsToRequest = SampleFrameToHNS(inStartFrame);
int64_t frameRequested = HNSToSampleFrame(hnsToRequest);
PROPVARIANT positionProp;
positionProp.vt = VT_I8;
positionProp.hVal.QuadPart = hnsToRequest;
HRESULT hr = mReader->SetCurrentPosition(GUID_NULL, positionProp);
mReader->Flush(0);
IMFSample* pSample = nullptr;
int bytesPerFrame = sizeof(float) * mNumChannels;
int64_t totalFramesWritten = 0;
int64_t remainingFrames = inNumFramesToRead;
int readCall = 0;
bool quit = false;
while (!quit) {
DWORD streamIndex = 0;
DWORD flags = 0;
LONGLONG llTimeStamp = 0;
hr = mReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM, // Stream index.
0, // Flags.
&streamIndex, // Receives the actual stream index.
&flags, // Receives status flags.
&llTimeStamp, // Receives the time stamp.
&pSample // Receives the sample or NULL.
);
int64_t frameOffset = 0;
if (readCall == 0) {
int64_t hnsOffset = hnsToRequest - llTimeStamp;
frameOffset = HNSToSampleFrame(hnsOffset);
}
++readCall;
if (pSample) {
IMFMediaBuffer* decodedBuffer = nullptr;
pSample->ConvertToContiguousBuffer(&decodedBuffer);
BYTE* rawBuffer = nullptr;
DWORD maxLength = 0;
DWORD bufferLengthInBytes = 0;
decodedBuffer->Lock(&rawBuffer, &maxLength, &bufferLengthInBytes);
int64_t availableFrames = bufferLengthInBytes / bytesPerFrame;
availableFrames -= frameOffset;
int64_t framesToCopy = min(availableFrames, remainingFrames);
// copy to outputBuffer
float* floatBuffer = (float*)rawBuffer;
float* offsetBuffer = &floatBuffer[frameOffset * mNumChannels];
for (int channel = 0; channel < mNumChannels; ++channel) {
for (int64_t frame = 0; frame < framesToCopy; ++frame) {
float sampleValue = offsetBuffer[frame * mNumChannels + channel];
outBuffer[channel][totalFramesWritten + frame] = sampleValue;
}
}
decodedBuffer->Unlock();
totalFramesWritten += framesToCopy;
remainingFrames -= framesToCopy;
if (totalFramesWritten >= inNumFramesToRead)
quit = true;
}
}
}
LONGLONG WindowsM4AReader::SampleFrameToHNS(int64_t inFrame)
{
return inFrame * (10000000.0 / mSampleRate);
}
int64_t WindowsM4AReader::HNSToSampleFrame(LONGLONG inHNS)
{
return inHNS / 10000000.0 * mSampleRate;
}
bool WindowsM4AReader::ConfigureAsFloatDecoder()
{
IMFMediaType* outputType = nullptr;
HRESULT hr = MFCreateMediaType(&outputType);
UINT32 bitsPerSample = sizeof(float) * 8;
UINT32 blockAlign = mNumChannels * (bitsPerSample / 8);
UINT32 bytesPerSecond = blockAlign * (UINT32)mSampleRate;
hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
hr = outputType->SetUINT32(MF_MT_AUDIO_PREFER_WAVEFORMATEX, TRUE);
hr = outputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, (UINT32)mNumChannels);
hr = outputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, (UINT32)mSampleRate);
hr = outputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, blockAlign);
hr = outputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, bytesPerSecond);
hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample);
hr = outputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);
DWORD streamIndex = 0;
hr = mReader->SetCurrentMediaType(streamIndex, NULL, outputType);
return true;
}
【问题讨论】:
标签: c++ audio ms-media-foundation