Search code examples
cvideoffmpegvideo-streamingvideo-processing

How to identify mpegts file?


How can I identify mpegts files? I copied the following code from here, and it works well on a lot of files, but he also identifies this file as a mpegts video. Below is the beginning of the problematic file.

This is the code I use:

#define MPEGTS_MIN_BUF_SIZE  1024 * 4 

static __forceinline const uint16_t av_bswap16(uint16_t x)
{
    x = (x >> 8) | (x << 8);
    return x;
}

#define AV_RB16(x) av_bswap16(x)
#define bswap16(x) ((x)>>8 | ((x)&255)<<8) // https://stackoverflow.com/a/648215/7206675

#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
#define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
#define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)

static int analyze2(const uint8_t* buf, int size, int packet_size,
    int probe)
{
    int stat[TS_MAX_PACKET_SIZE];
    int stat_all = 0;
    int i;
    int best_score = 0;

    memset(stat, 0, packet_size * sizeof(*stat));

    for (i = 0; i < size - 3; i++) 
    {
        if (buf[i] == 0x47) 
        {
            int pid = AV_RB16(buf + 1) & 0x1FFF;
            int asc = buf[i + 3] & 0x30;
            if (!probe || pid == 0x1FFF || asc) 
            {
                int x = i % packet_size;
                stat[x]++;
                stat_all++;
                if (stat[x] > best_score) 
                {
                    best_score = stat[x];
                }
            }
        }
    }

    return best_score - FFMAX(stat_all - 10 * best_score, 0) / 10;
}

/**
 * This structure contains the data a format has to probe a file.
 */
typedef struct AVProbeData {
    const char* filename;
    unsigned char* buf; /**< Buffer must have AVPROBE_PADDING_SIZE of extra allocated bytes filled with zero. */
    int buf_size;       /**< Size of buf except extra allocated bytes */
    const char* mime_type; /**< mime_type, when known. */
} AVProbeData;

#define AVPROBE_SCORE_MAX       100 ///< maximum score

static int mpegts_probe(const AVProbeData* p)
{
    const int size = p->buf_size;
    int maxscore = 0;
    int sumscore = 0;
    int i;
    int check_count = size / TS_FEC_PACKET_SIZE;
#define CHECK_COUNT 10
#define CHECK_BLOCK 100

    if (!check_count)
        return 0;

    for (i = 0; i < check_count; i += CHECK_BLOCK) {
        int left = FFMIN(check_count - i, CHECK_BLOCK);
        int score = analyze2(p->buf + TS_PACKET_SIZE * i, TS_PACKET_SIZE * left, TS_PACKET_SIZE, 1);
        int dvhs_score = analyze2(p->buf + TS_DVHS_PACKET_SIZE * i, TS_DVHS_PACKET_SIZE * left, TS_DVHS_PACKET_SIZE, 1);
        int fec_score = analyze2(p->buf + TS_FEC_PACKET_SIZE * i, TS_FEC_PACKET_SIZE * left, TS_FEC_PACKET_SIZE, 1);
        score = FFMAX3(score, dvhs_score, fec_score);
        sumscore += score;
        maxscore = FFMAX(maxscore, score);
    }

    sumscore = sumscore * CHECK_COUNT / check_count;
    maxscore = maxscore * CHECK_COUNT / CHECK_BLOCK;

    //ff_dlog(0, "TS score: %d %d\n", sumscore, maxscore);

    if (check_count > CHECK_COUNT && sumscore > 6) {
        return AVPROBE_SCORE_MAX + sumscore - CHECK_COUNT;
    }
    else if (check_count >= CHECK_COUNT && sumscore > 6) {
        return AVPROBE_SCORE_MAX / 2 + sumscore - CHECK_COUNT;
    }
    else if (check_count >= CHECK_COUNT && maxscore > 6) {
        return AVPROBE_SCORE_MAX / 2 + sumscore - CHECK_COUNT;
    }
    else if (sumscore > 6) {
        return 2;
    }
    else {
        return 0;
    }
}

int main() 
{
    BOOL IsMpegtsVideo = FALSE;
    char buf[MPEGTS_MIN_BUF_SIZE];

    ReadMyTestFile(buf);

    AVProbeData p;
    p.buf = buf;
    p.buf_size = MPEGTS_MIN_BUF_SIZE;
    int score = mpegts_probe(&p);

    IsMpegtsVideo = score > 0;

    return 0;
}

This is the beginning of the file that was mistakenly identified as a video:

#ifdef TARGET_ABI32
/*
 * Linux N32 syscalls are in the range from 6000 to 6999.
 */
#define TARGET_NR_Linux                 6000
#define TARGET_NR_read                  (TARGET_NR_Linux +   0)
#define TARGET_NR_write                 (TARGET_NR_Linux +   1)
#define TARGET_NR_open                  (TARGET_NR_Linux +   2)
#define TARGET_NR_close                 (TARGET_NR_Linux +   3)
#define TARGET_NR_stat                  (TARGET_NR_Linux +   4)
#define TARGET_NR_fstat                 (TARGET_NR_Linux +   5)
#define TARGET_NR_lstat                 (TARGET_NR_Linux +   6)
#define TARGET_NR_poll                  (TARGET_NR_Linux +   7)
#define TARGET_NR_lseek                 (TARGET_NR_Linux +   8)
#define TARGET_NR_mmap                  (TARGET_NR_Linux +   9)
#define TARGET_NR_mprotect              (TARGET_NR_Linux +  10)
#define TARGET_NR_munmap                (TARGET_NR_Linux +  11)
#define TARGET_NR_brk                   (TARGET_NR_Linux +  12)
#define TARGET_NR_rt_sigaction          (TARGET_NR_Linux +  13)
#define TARGET_NR_rt_sigprocmask        (TARGET_NR_Linux +  14)
#define TARGET_NR_ioctl                 (TARGET_NR_Linux +  15)
#define TARGET_NR_pread64               (TARGET_NR_Linux +  16)
#define TARGET_NR_pwrite64              (TARGET_NR_Linux +  17)
#define TARGET_NR_readv                 (TARGET_NR_Linux +  18)
#define TARGET_NR_writev                (TARGET_NR_Linux +  19)
#define TARGET_NR_access                (TARGET_NR_Linux +  20)
#define TARGET_NR_pipe                  (TARGET_NR_Linux +  21)
#define TARGET_NR__newselect            (TARGET_NR_Linux +  22)
#define TARGET_NR_sched_yield           (TARGET_NR_Linux +  23)
#define TARGET_NR_mremap                (TARGET_NR_Linux +  24)
#define TARGET_NR_msync                 (TARGET_NR_Linux +  25)
#define TARGET_NR_mincore               (TARGET_NR_Linux +  26)
#define TARGET_NR_madvise               (TARGET_NR_Linux +  27)
#define TARGET_NR_shmget                (TARGET_NR_Linux +  28)
#define TARGET_NR_shmat                 (TARGET_NR_Linux +  29)
#define TARGET_NR_shmctl                (TARGET_NR_Linux +  30)
#define TARGET_NR_dup                   (TARGET_NR_Linux +  31)
#define TARGET_NR_dup2                  (TARGET_NR_Linux +  32)

How can I improve my code so that it does not recognize the above file as a video and on the other hand continues to recognize as a video files that are actually video?


Solution

  • You need to check the Pid field - Packet identifier flag, used to associate one packet with a set. Which can have a lot of valid values but must first come with values like PatPid, SdtPid. If these values do not appear in the first packets it is probably not video.

    public enum PidType
    {
        PatPid = 0x0,
        CatPid = 0x1,
        TsDescPid = 0x2,
        NitPid = 0x10,
        SdtPid = 0x11,
        EitPid = 0x12,
        NullPid = 0x1FFF
    }