WXInlinePlayer的FLV解封装部分。

我们从外部调用的codecDecode函数开始介绍起, 看lib/codec/src/main.cpp:

1
2
3
4
5
void codecDecode(uint8_t *bytes, uint32_t length) {
  if (codec != nullptr) {             // codec是个全局变量
    codec->decode(bytes, length);
  }
}

然后看lib/codec/src/codec/codec.cpp

1
2
3
4
void Codec::decode(uint8_t *bytes, uint32_t byteLen) {
  shared_ptr<Buffer> buffer = make_shared<Buffer>(bytes, byteLen);
  _decoder->decode(buffer);
}

再进入lib/codec/src/demuxer/decoder.cpp

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
void Decoder::decode(shared_ptr<Buffer> &buffer) {
  _buffer = make_shared<Buffer>(*_buffer + *buffer);      // 下划线的是成员变量, 拼接buffer
  for (;;) {
    switch (_state) {
      case Header::STATE: {                               // 如果还没拿到文件头信息
        if (_buffer->get_length() < Header::MIN_LENGTH) { // 长度不足
          return;
        }

        HeaderValue value = _header->decode(_buffer);     // 解析FLV文件头
        if (value.empty) {
          return;
        }

        if (_factor != nullptr) {
          _factor->recvHeaderValue(value);                // 通知上层, 收到header信息了
        }
        _buffer = value.buffer;                           // 将_buffer设置为解析完header后剩下的数据
        _state = Body::STATE;                             // 读完header, 就等body的数据了
        break;
      }
      case Body::STATE: {                                 // header解析完, 就一直在解析body数据
        if (_buffer->get_length() < Body::MIN_LENGTH) {
          return;
        }

        shared_ptr<BodyValue> value = _body->decode(_buffer); // 解析body中的一个个tag
        if (_factor != nullptr) {
          // 如果是媒体信息, 透传mediaInfo;如果是音频, 上报audio消息, 传出ADTS; 如果是视频, 解码后, 上报video消息
          _factor->recvBodyValue(value);
        }
        _buffer = value->buffer;
      }
      default:
        return;
    }
  }
}

FLV文件分两个部分, header和body, 大体如下:

关于FLV文件格式, 建议可以对着规范一起看: flv规范传送门.

理解上图之后, header部分代码比较简单, 这里不看了, 直接看body部分lib/codec/src/demuxer/body.cpp

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
shared_ptr<BodyValue> Body::decode(shared_ptr<Buffer> &buffer) {
  Tag tag;
  shared_ptr<BodyValue> value = make_shared<BodyValue>();

  for (;;) {
    if (buffer->get_length() < Body::MIN_LENGTH) {
      break;                                // 连previousTagSize的长度都不足, 直接退出
    }

    uint32_t size = buffer->read_uint32_be(0); // 读取了previousTagSize, 但是啥都没干...废代码
    shared_ptr<Buffer> body = make_shared<Buffer>(buffer->slice(4)); // 去掉previousTagSize继续解析
    if (body->get_length() < Tag::MIN_LENGTH) {
      break;                                // 数据大小不足tag内其它字段的length总和, 也就是11
    }

    TagValue retValue = tag.decode(body);
    if (retValue.unvalidate) {
      break;
    }

    buffer = retValue.buffer;               // 赋值为剩下的未解析的tag数据
    retValue.buffer = make_shared<Buffer>();
    value->tags->push_back(retValue);
  }

  value->buffer = buffer;           // 赋值为剩下的未解析的tag数据
  return value;
}

然后看tag怎么解析的

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
TagValue Tag::decode(shared_ptr<Buffer> &buffer, uint32_t size) {
  _type = buffer->read_uint8(0);            // 解析tagType
  _size = buffer->read_uint24_be(1);        // 解析dataSize

  uint32_t ts0 = buffer->read_uint24_be(4); // 时间戳, 大端序
  uint32_t ts1 = buffer->read_uint8(7);     // 扩展时间戳, 高8位
  _timestamp = (ts1 << 24) | ts0;           // 组成32位的时间戳, 单位毫秒, 第一个tag的时间戳是0

  _streamId = buffer->read_uint24_be(8) >> 8;
  if (_streamId != 0) {                     // 流ID必须为0
    return TagValue(true);
  }

  if (buffer->get_length() < Tag::MIN_LENGTH + _size) {
    return TagValue(true);                  // 本次数据不足, 下次数据够了再来吧
  }

  buffer = make_shared<Buffer>(buffer->slice(Tag::MIN_LENGTH));

  TagValue value;
  switch (_type) {
    case AudioTag::TYPE: {                  // 当tag是音频数据
      AudioTag tag;
      value.type = AudioTag::TYPE;
      value.audioTag = tag.decode(buffer, _size);
      value.audioTag.buffer = make_shared<Buffer>();
      break;
    }
    case VideoTag::TYPE: {                  // 当tag是视频数据
      VideoTag tag;
      value.type = VideoTag::TYPE;
      value.videoTag = tag.decode(buffer, _size);
      value.videoTag.buffer = make_shared<Buffer>();
      break;
    }
    case DataTag::TYPE: {                   // 当tag是脚本数据
      DataTag tag;
      value.type = DataTag::TYPE;
      value.dataTag = tag.decode(buffer, _size);
      value.dataTag.buffer = make_shared<Buffer>();
      break;
    }
    default:
      return TagValue(true);
  }

  value.timestamp = _timestamp;
  value.buffer = make_shared<Buffer>(buffer->slice(_size));
  return value;
}

先从lib/codec/src/demuxer/audio_tag.cpp看音频tag的解析:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
AudioTagValue AudioTag::decode(shared_ptr<Buffer> &buffer, uint32_t size) {
  _soundFormat = (uint32_t) ((buffer->read_uint8(0) & 240) >> 4); // 高4位是音频格式
  if (_soundFormat != 10) {                                       // 这里只支持AAC解析
    return AudioTagValue(false);
  }

  _soundRate = (uint32_t) ((buffer->read_uint8(0) & 12) >> 2);    // 采样频率, 对于AAC这个值是3, 代表44-kHZ
  _soundSize = (uint32_t) ((buffer->read_uint8(0) & 2) >> 1);     // 每个采样的大小, 0是8bit, 1是16bit
  _soundType = (uint32_t) (buffer->read_uint8(0) & 1);            // 0-单声道, 1-立体声, 对于AAC总是1
  _AACPacketType = buffer->read_uint8(1);                         // 0-AAC sequence header, 1-AAC裸数据

  AudioTagValue value;
  value.soundFormat = _soundFormat;
  value.soundRate = _soundRate;
  value.soundSize = _soundSize;
  value.soundType = _soundType;
  value.AACPacketType = _AACPacketType;
  value.data = make_shared<Buffer>(buffer->slice(2, size));
  value.buffer = make_shared<Buffer>(buffer->slice(size));
  return value;
}

然后在lib/codec/src/factor/codec_factor.cpp看下音频数据是怎么回传的

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
void CodecFactor::_handleAudioTag(AudioTagValue &tag, uint32_t timestamp) const {
  if (tag.AACPacketType == 0) {                   // 如果是AAC sequence header
    shared_ptr<Buffer> audioSpecificConfig = tag.data; 
    // 5 bits, 2是AAC-LC,5是SBR,29是PS
    int audioObjectType = ((*audioSpecificConfig)[0] & 0xf8) >> 3;
    // 4 bits,用来表示采样率表中的索引号
    int samplingFrequencyIndex = (((*audioSpecificConfig)[0] & 0x7) << 1) | ((*audioSpecificConfig)[1] >> 7);
    // 4bits, 音频声道数
    int channelConfig = ((*audioSpecificConfig)[1] >> 3) & 0x0f;
    // GASpecificConfig, 没用上 
    int frameLengthFlag = ((*audioSpecificConfig)[1] >> 2) & 0x01;
    // GASpecificConfig, 没用上 
    int dependsOnCoreCoder = ((*audioSpecificConfig)[1] >> 1) & 0x01;
    // GASpecificConfig, 没用上 
    int extensionFlag = (*audioSpecificConfig)[1] & 0x01;

    // 拼成ADTS(Audio Data Transport Stream))头
    uint8_t adts[7] = {
            0xff,
            0xf0 | (0 << 3) | (0 << 1) | 1,
            (uint8_t) (((audioObjectType - 1) << 6) | ((samplingFrequencyIndex & 0x0f) << 2) | (0 << 1) |
                       ((channelConfig & 0x04) >> 2)),
            (uint8_t) (((channelConfig & 0x03) << 6) | (0 << 5) | (0 << 4) | (0 << 3) | (0 << 2) |
                       ((7 & 0x1800) >> 11)),
            (uint8_t) ((7 & 0x7f8) >> 3),
            (uint8_t) (((7 & 0x7) << 5) | 0x1f),
            0xfc,
    };
    _codec->adtsHeader = make_shared<Buffer>(adts, 7);  // 存到成员变量里
  } else if (tag.AACPacketType == 1) {          // 如果是AAC裸音频数据
    // 从成员变量拷贝一份ADTS的header, 然后把包长度写进去
    shared_ptr<Buffer> adtsHeader = make_shared<Buffer>();
    adtsHeader = make_shared<Buffer>(*adtsHeader + *_codec->adtsHeader);
    shared_ptr<Buffer> adtsBody = tag.data;
    uint32_t adtsLen = adtsBody->get_length() + 7;
    adtsHeader->write_uint8(adtsHeader->read_uint8(3) | ((adtsLen & 0x1800) >> 11), 3);
    adtsHeader->write_uint8((adtsLen & 0x7f8) >> 3, 4);
    adtsHeader->write_uint8((((adtsLen & 0x7) << 5) | 0x1f), 5);
    adtsHeader->write_uint8(0xfc, 6);
    adtsBody = make_shared<Buffer>(*adtsHeader + *adtsBody);
#ifdef __EMSCRIPTEN__
    // 回传数据大小, 让上层准备buffer接收数据
    EM_ASM({
      var isWorker = typeof importScripts == "function";
      var bridge = (isWorker ? self : window)[UTF8ToString($0)];
      if(bridge && typeof bridge["onAudioDataSize"] == "function"){
        bridge["onAudioDataSize"]({
          "size": $1,
        });
      }
    }, _codec->bridgeName.c_str(), adtsBody->get_length());

    // 回传数据, 避免每次malloc分配空间
    if(_codec->audioBuffer != nullptr){
      memcpy(_codec->audioBuffer, adtsBody->get_buf_ptr(), adtsBody->get_length());
      EM_ASM({
        var isWorker = typeof importScripts == "function";
        var bridge = (isWorker ? self : window)[UTF8ToString($0)];
        if(bridge && typeof bridge["onAudioData"] == "function"){
          bridge["onAudioData"]({
            "timestamp": $1,
          });
        }
      }, _codec->bridgeName.c_str(), timestamp);
    }
#endif
  }
}

这个解析到回传的流程从音频这里看的比较明显. 我们来看看下个重点, 视频的处理: lib/codec/src/demuxer/video_tag.cpp

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
VideoTagValue VideoTag::decode(const shared_ptr<Buffer> &buffer, uint32_t size) {
  // 4 bits, 帧类型(0-关键帧,1-帧间帧)
  _frameType = (uint32_t) ((buffer->read_uint8(0) & 240) >> 4);
  // 4 bits, 编码ID(7-AVC,12-HEVC)
  _codecId = (uint32_t) (buffer->read_uint8(0) & 15);
  if (_codecId != 7 && _codecId != 12) {
    return VideoTagValue(false);
  }

  // 对于H264和H265来说, 这个字段是代表包类型(0-sequence header, 1-nalus, 2-end of sequence)
  _AVCPacketType = buffer->read_uint8(1);
  _compositionTime = (uint32_t) (buffer->read_int32_be(2) >> 8);

  VideoTagValue value;
  value.frameType = _frameType;
  value.codecId = _codecId;
  value.AVCPacketType = _AVCPacketType;
  value.compositionTime = _compositionTime;
  value.data = make_shared<Buffer>(buffer->slice(5, size));
  value.buffer = make_shared<Buffer>(buffer->slice(size));
  return value;
}

再来看lib/codec/src/factor/codec_factor.cpp, 看看视频的处理与帧怎么解码上报:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
void CodecFactor::_handleVideoTag(VideoTagValue &tag, uint32_t timestamp) {
  uint32_t width = 0;
  uint32_t height = 0;
  uint32_t stride0 = 0;
  uint32_t stride1 = 0;
  uint8_t *picPtr = nullptr;

#ifdef USE_OPEN_H265
  // nothing to do
#elif defined(USE_OPEN_H264)
  unsigned char *pDst[3] = {0};
  SBufferInfo sDstInfo = {0};
#endif

  // ###################流的开始,需要指定SPS和PPS#####################
  // 内容是AVCDecoderConfigurationRecord, 也就是AVC sequence header
  if (tag.AVCPacketType == 0) {
    shared_ptr<Buffer> unit = tag.data;
    // 版本号
    uint8_t configurationVersion = unit->read_uint8(0);
    // SPS[1]
    uint8_t AVCProfileIndication = unit->read_uint8(1);
    // SPS[2]
    uint8_t profileCompatibility = unit->read_uint8(2);
    // SPS[3]
    uint8_t AVCLevelIndication = unit->read_uint8(3);
    // NALU的长度占几个字节, 这里的名字误导人, 其实应该叫lengthSize了, 因为加了1了
    _codec->lengthSizeMinusOne = (unit->read_uint8(4) & 3) + 1;

    // SPS的个数
    int numOfSequenceParameterSets = unit->read_uint8(5) & 0x1f;
    // 16bit, SPS的长度, 这里只有一个SPS, 所以只读一个
    int sequenceParameterSetLength = unit->read_uint16_be(6);
    // 读取SPS, 存到成员变量中, 其中_mask的值为 {0x00, 0x00, 0x00, 0x01}
    _codec->sps = make_shared<Buffer>(unit->slice(8, 8 + (uint32_t) sequenceParameterSetLength));
    _codec->sps = make_shared<Buffer>(*_mask + *_codec->sps);

    // 解码器处理SPS
#ifdef USE_OPEN_H265
    // nothing to do
#elif defined(USE_OPEN_H264)
    _codec->storage->DecodeFrame2(_codec->sps->get_buf_ptr(), _codec->sps->get_length(), &pDst[0], &sDstInfo);
#else
    h264bsdDecode(_codec->storage, _codec->sps->get_buf_ptr(), _codec->sps->get_length(), &picPtr, &width, &height);
#endif

    // PPS的个数
    int numOfPictureParameterSets = unit->read_uint8(8 + (uint32_t) sequenceParameterSetLength);
    // 16bit, 这里只有一个PPS, 只读一个
    int pictureParameterSetLength = unit->read_uint16_be(8 + (uint32_t) sequenceParameterSetLength + 1);
    // 读取PPS, 存到成员变量
    _codec->pps = make_shared<Buffer>(unit->slice(
            8 + (uint32_t) sequenceParameterSetLength + 3,
            8 + (uint32_t) sequenceParameterSetLength + 3 + pictureParameterSetLength
    ));
    // 其中_mask的值为 {0x00, 0x00, 0x00, 0x01}
    _codec->pps = make_shared<Buffer>(*_mask + *_codec->pps);
  
  //解码器处理PPS
#ifdef USE_OPEN_H265
    // nothing to do
#elif defined(USE_OPEN_H264)
    _codec->storage->DecodeFrame2(_codec->pps->get_buf_ptr(), _codec->pps->get_length(), &pDst[0], &sDstInfo);
#else
    h264bsdDecode(_codec->storage, _codec->pps->get_buf_ptr(), _codec->pps->get_length(), &picPtr, &width, &height);
#endif

  // ###################流的中间,nalus#####################
  } else if (tag.AVCPacketType == 1) {
    uint32_t size = tag.data->get_length();
    shared_ptr<Buffer> unit = tag.data;
    shared_ptr<Buffer> nalus = make_shared<Buffer>();
    while (size > 0) {
      int naluLen = 0;
      // 计算本次NALU长度
      for (uint32_t i = 0; i < _codec->lengthSizeMinusOne; i++) {
        naluLen |= unit->read_uint8(i) << ((_codec->lengthSizeMinusOne - 1 - i) * 8);
      }
      shared_ptr<Buffer> nalu = make_shared<Buffer>(unit->slice(
              (uint32_t) _codec->lengthSizeMinusOne,
              (uint32_t) _codec->lengthSizeMinusOne + naluLen
      ));
      // 将所有NALUS包写到一个buffer里面,其中_mask的值为 {0x00, 0x00, 0x00, 0x01}
      nalus = make_shared<Buffer>(*nalus + *_mask + *nalu);
      unit = make_shared<Buffer>(unit->slice((uint32_t) _codec->lengthSizeMinusOne + naluLen));
      size -= _codec->lengthSizeMinusOne + naluLen;
    }

    // 解码视频帧
#ifdef USE_OPEN_H265
    if(true) {
#elif defined(USE_OPEN_H264)
    uint32_t retCode = _codec->storage->DecodeFrame2(nalus->get_buf_ptr(), nalus->get_length(), &pDst[0], &sDstInfo);
    if (retCode == 0 && sDstInfo.iBufferStatus == 1) {
      width = (uint32_t) sDstInfo.UsrData.sSystemBuffer.iWidth;
      height = (uint32_t) sDstInfo.UsrData.sSystemBuffer.iHeight;
      stride0 = (uint32_t) sDstInfo.UsrData.sSystemBuffer.iStride[0];
      stride1 = (uint32_t) sDstInfo.UsrData.sSystemBuffer.iStride[1];
#else
      uint32_t retCode = h264bsdDecode(_codec->storage, nalus->get_buf_ptr(), nalus->get_length(), &picPtr, &width, &height);
      if (retCode == H264BSD_PIC_RDY) {
        stride0 = width;
        stride1 = height;
#endif

      uint32_t totalSize = (width * height) * 3 / 2;

#ifdef __EMSCRIPTEN__
      // 上报本次解码的视频帧的数据大小, 让上层准备好buffer
      EM_ASM({
        var isWorker = typeof importScripts == "function";
        var bridge = (isWorker ? self : window)[UTF8ToString($0)];
        if(bridge && typeof bridge["onVideoDataSize"] == "function"){
          bridge["onVideoDataSize"]({
            "size": $1,
          });
        }
      }, _codec->bridgeName.c_str(), totalSize);

      // 避免每次分配, js里面一次性分配好buffer
      if(_codec->videoBuffer != nullptr){

#ifdef USE_OPEN_H265
  // nothing to do
#elif defined(USE_OPEN_H264)
      uint32_t startIndex = 0;
      uint8_t *ptr = pDst[0];
      uint32_t iWidth = width;
      uint32_t iHeight = height;
      for (uint32_t i = 0; i < iHeight; i++) {
        memcpy(_codec->videoBuffer + startIndex, ptr, iWidth);
        ptr += stride0;
        startIndex += iWidth;
      }

      ptr = pDst[1];
      iWidth = width / 2;
      iHeight = height / 2;
      for (uint32_t i = 0; i < iHeight; i++) {
        memcpy(_codec->videoBuffer + startIndex, ptr, iWidth);
        ptr += stride1;
        startIndex += iWidth;
      }

      ptr = pDst[2];
      iWidth = width / 2;
      iHeight = height / 2;
      for (uint32_t i = 0; i < iHeight; i++) {
        memcpy(_codec->videoBuffer + startIndex, ptr, iWidth);
        ptr += stride1;
        startIndex += iWidth;
      }

      stride0 = width;
      stride1 = height;
#else
      memcpy(_codec->videoBuffer, picPtr, totalSize);
#endif

        // 解码完成, 上报视频帧, 时间戳为tag里面的时间戳
        EM_ASM({
          var isWorker = typeof importScripts == "function";
          var bridge = (isWorker ? self : window)[UTF8ToString($0)];
          if(bridge && typeof bridge["onVideoData"] == "function"){
            bridge["onVideoData"]({
              "timestamp": $1,
              "width": $2,
              "height": $3,
              "stride0": $4,
              "stride1": $5
            });
          }
        }, _codec->bridgeName.c_str(), timestamp, width, height, stride0, stride1);
      }
#endif
    }
  // ###################流的最后,表明视频流结束#####################
  } else if (tag.AVCPacketType == 2) {
#ifdef __EMSCRIPTEN__
    EM_ASM({
      var isWorker = typeof importScripts == "function";
      var bridge = (isWorker ? self : window)[UTF8ToString($0)];
      if(bridge && typeof bridge["onComplete"] == "function"){
        bridge["onComplete"]();
      }
    }, _codec->bridgeName.c_str());
#endif
  }
}

由此, 我们看到, 解码视频帧是在解封装后, 在上报视频帧之前进行解码.