Search code examples
c++hexasciiwebmmkv

MKV file cluster size which is encoded?


Element IDs (also called EBML IDs), beginning with the ID itself, followed by the Data Size.

Data size, in octets, is also coded with an UTF-8 like system.

How can I decode the data size in decimal


Solution

  • You can find the below simple sample code in https://github.com/wangf1978/DumpTS/blob/master/Matroska.h

    static uint64_t UnpackUnsignedIntVal(CBitstream&bs, uint8_t max_octs = 8, bool unPackVal=true, uint8_t* pcbValLen=nullptr)
    {
        uint8_t nLeadingZeros = 0;
        uint64_t u64Val = bs.GetByte();
        for (uint8_t i = 0; i < max_octs; i++)
            if ((u64Val&(1ULL << (7 - i))) == 0)
                nLeadingZeros++;
            else
                break;
    
        if (nLeadingZeros >= max_octs)  // Unexpected
            return UINT64_MAX;
    
        if (unPackVal)
            u64Val &= ~(1 << (7 - nLeadingZeros));
    
        for (uint8_t i = 0; i<nLeadingZeros; i++)
            u64Val = (((uint64_t)u64Val) << 8) | (uint8_t)bs.GetBits(8);
    
        if (pcbValLen != nullptr)
            *pcbValLen = nLeadingZeros + 1;
    
        return u64Val;
    }
    
    virtual int Unpack(CBitstream& bs)
    {
        // Read the element ID
        uint64_t u64Val = UnpackUnsignedIntVal(bs, 4, false);
        if (u64Val == UINT64_MAX)
            return -1;
    
        ID = (uint32_t)u64Val;
    
        if ((u64Val = UnpackUnsignedIntVal(bs)) == UINT64_MAX)
            return -1;
    
        Size = u64Val;
    
        //printf("ID: 0X%X, Size: %lld(0X%llX)\n", ID, Size, Size);
    
        return 0;
    }