Avformat of ffmpeg_ open_ Input () analysis process -- Taking mp4 as an example

Summary

avformat_open_input(), which is used to open multimedia data and get some information. Its declaration is located in libavformat / avformat h. Main work

1) Through init_input opens the streaming media data, detects the most appropriate protocol type AVInputFormat of streaming media according to the probe, and sets the read/write/seek related callback through open2

2)read_header reads the media header information and creates AVStream according to the corresponding protocol, and parses the streaming media (MP4 file will be parsed. This paper takes MP4 as an example)

Frame diagram

code analysis

int avformat_open_input(AVFormatContext **ps, const char *filename,
                        AVInputFormat *fmt, AVDictionary **options)
{
    AVFormatContext *s = *ps;
    int i, ret = 0;
    AVDictionary *tmp = NULL;
    ID3v2ExtraMeta *id3v2_extra_meta = NULL;

    if (!s && !(s = avformat_alloc_context())) //If s is not initialized, initialize s first
        return AVERROR(ENOMEM);
    if (!s->av_class) {
        av_log(NULL, AV_LOG_ERROR, "Input context has not been properly allocated by avformat_alloc_context() and is not NULL either\n");
        return AVERROR(EINVAL);
    }
    if (fmt)
        s->iformat = fmt;

    if (options)
        av_dict_copy(&tmp, *options, 0);

    if (s->pb) // must be before any goto fail
        s->flags |= AVFMT_FLAG_CUSTOM_IO;

    if ((ret = av_opt_set_dict(s, &tmp)) < 0)
        goto fail;

    if (!(s->url = av_strdup(filename ? filename : ""))) {
        ret = AVERROR(ENOMEM);
        goto fail;
    }

#if FF_API_FORMAT_FILENAME
FF_DISABLE_DEPRECATION_WARNINGS
    av_strlcpy(s->filename, filename ? filename : "", sizeof(s->filename));
FF_ENABLE_DEPRECATION_WARNINGS
#endif
	//	1. Focus here:
    if ((ret = init_input(s, filename, &tmp)) < 0)
        goto fail;
    s->probe_score = ret;

    if (!s->protocol_whitelist && s->pb && s->pb->protocol_whitelist) {
        s->protocol_whitelist = av_strdup(s->pb->protocol_whitelist);
        if (!s->protocol_whitelist) {
            ret = AVERROR(ENOMEM);
            goto fail;
        }
    }

    if (!s->protocol_blacklist && s->pb && s->pb->protocol_blacklist) {
        s->protocol_blacklist = av_strdup(s->pb->protocol_blacklist);
        if (!s->protocol_blacklist) {
            ret = AVERROR(ENOMEM);
            goto fail;
        }
    }

    if (s->format_whitelist && av_match_list(s->iformat->name, s->format_whitelist, ',') <= 0) {
        av_log(s, AV_LOG_ERROR, "Format not on whitelist \'%s\'\n", s->format_whitelist);
        ret = AVERROR(EINVAL);
        goto fail;
    }

    avio_skip(s->pb, s->skip_initial_bytes);

    /* Check filename in case an image number is expected. */
    if (s->iformat->flags & AVFMT_NEEDNUMBER) {
        if (!av_filename_number_test(filename)) {
            ret = AVERROR(EINVAL);
            goto fail;
        }
    }

    s->duration = s->start_time = AV_NOPTS_VALUE;

    /* Allocate private data. */
    if (s->iformat->priv_data_size > 0) {
        if (!(s->priv_data = av_mallocz(s->iformat->priv_data_size))) {
            ret = AVERROR(ENOMEM);
            goto fail;
        }
        if (s->iformat->priv_class) {
            *(const AVClass **) s->priv_data = s->iformat->priv_class;
            av_opt_set_defaults(s->priv_data);
            if ((ret = av_opt_set_dict(s->priv_data, &tmp)) < 0)
                goto fail;
        }
    }

    /* e.g. AVFMT_NOFILE formats will not have a AVIOContext */
    if (s->pb)
        ff_id3v2_read_dict(s->pb, &s->internal->id3v2_meta, ID3v2_DEFAULT_MAGIC, &id3v2_extra_meta);


    if (!(s->flags&AVFMT_FLAG_PRIV_OPT) && s->iformat->read_header)
    //Focus 2
        if ((ret = s->iformat->read_header(s)) < 0)
            goto fail;

    if (!s->metadata) {
        s->metadata = s->internal->id3v2_meta;
        s->internal->id3v2_meta = NULL;
    } else if (s->internal->id3v2_meta) {
        int level = AV_LOG_WARNING;
        if (s->error_recognition & AV_EF_COMPLIANT)
            level = AV_LOG_ERROR;
        av_log(s, level, "Discarding ID3 tags because more suitable tags were found.\n");
        av_dict_free(&s->internal->id3v2_meta);
        if (s->error_recognition & AV_EF_EXPLODE)
            return AVERROR_INVALIDDATA;
    }

    if (id3v2_extra_meta) {
        if (!strcmp(s->iformat->name, "mp3") || !strcmp(s->iformat->name, "aac") ||
            !strcmp(s->iformat->name, "tta")) {
            if ((ret = ff_id3v2_parse_apic(s, &id3v2_extra_meta)) < 0)
                goto fail;
            if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < 0)
                goto fail;
            if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0)
                goto fail;
        } else
            av_log(s, AV_LOG_DEBUG, "demuxer does not support additional id3 data, skipping\n");
    }
    ff_id3v2_free_extra_meta(&id3v2_extra_meta);

    if ((ret = avformat_queue_attached_pictures(s)) < 0)
        goto fail;

    if (!(s->flags&AVFMT_FLAG_PRIV_OPT) && s->pb && !s->internal->data_offset)
        s->internal->data_offset = avio_tell(s->pb);

    s->internal->raw_packet_buffer_remaining_size = RAW_PACKET_BUFFER_SIZE;

    update_stream_avctx(s);

    for (i = 0; i < s->nb_streams; i++)
        s->streams[i]->internal->orig_codec_id = s->streams[i]->codecpar->codec_id;

    if (options) {
        av_dict_free(options);
        *options = tmp;
    }
    *ps = s;
    return 0;

fail:
    ff_id3v2_free_extra_meta(&id3v2_extra_meta);
    av_dict_free(&tmp);
    if (s->pb && !(s->flags & AVFMT_FLAG_CUSTOM_IO))
        avio_closep(&s->pb);
    avformat_free_context(s);
    *ps = NULL;
    return ret;
}

avformat_open_input mainly performs two steps:

1)init_input

2)s->iformat->read_header2(s, &tmp2)

1,init_input:

int init_input(AVFormatContext *s, const char *filename,
                      AVDictionary **options)
{
    AVProbeData pd = { filename, NULL, 0 };
    int score = AVPROBE_SCORE_RETRY;
    //1. If AVIOContext is customized, the customized
    if (s->pb) {
        s->flags |= AVFMT_FLAG_CUSTOM_IO;
        //1.2 if AVInputFormat is not defined, ffmpeg will guess
        if (!s->iformat)
            return av_probe_input_buffer2(s->pb, &s->iformat, filename,
                                         s, 0, s->format_probesize);
        else if (s->iformat->flags & AVFMT_NOFILE)
            av_log(s, AV_LOG_WARNING, "Custom AVIOContext makes no sense and "
                                      "will be ignored with AVFMT_NOFILE format.\n");
        return 0;
    }
    //2. If AVInputFormat is specified, it returns directly
    //Or AV if AVInputFormat is not specified_ probe_ input_ Format2 (extension / file type string) to speculate
    if ((s->iformat && s->iformat->flags & AVFMT_NOFILE) ||
        (!s->iformat && (s->iformat = av_probe_input_format2(&pd, 0, &score))))
        return score;
    //3. If the file name or type does not match, open the file, read the content and speculate
    if ((ret = s->io_open(s, &s->pb, filename, AVIO_FLAG_READ | s->avio_flags, options)) < 0)
        return ret;

    if (s->iformat)
        return 0;
    return av_probe_input_buffer2(s->pb, &s->iformat, filename,
                                 s, 0, s->format_probesize);
}

init_input is used to open media data and find the most appropriate protocol type (AVInputFormat) according to the matching rules

There are three steps:

1) If the AVIOContext is customized, the customized pb is used, such as

        unsigned char *aviobuffer=(unsigned char *)av_malloc(VIDEO_AVIO_BUFFER_SIZE);
        avio =avio_alloc_context(aviobuffer, VIDEO_AVIO_BUFFER_SIZE,0,NULL,&ff_read_buffer_func_cb,NULL,NULL);
        ic->pb = avio;
        ic->flags |= AVFMT_FLAG_CUSTOM_IO;

Custom pb = avio

If AVInputFormat is not defined, ffmpeg will guess

if (!s->iformat)
            return av_probe_input_buffer2(s->pb, &s->iformat, filename,
                                         s, 0, s->format_probesize);

2) Returns directly if AVInputFormat is specified, or AV if AVInputFormat is not specified_ probe_ input_ Format2 (extension / file type string) to speculate

3) If the file name or type does not match, open the file, read the content and speculate

-------------------------------------------------------------------------------------------------------------------------------

problem
    avformat_ open_ The cost of detecting ES stream and PS stream by input function is 150 ms and 500 ms respectively. avformat_ open_ AV has been implemented in the input function_ probe_ input_ Call the buffer function to detect the related variables of AVInputFormat structure. So in avformat_ open_ Before the input function, call av_. probe_ input_ After the buffer function, the AVInputFormat structure will not be detected

Optimization direction
Attempt to mask avformat_ open_ The input function directly specifies the input format pInputFormat of the code stream. The code is as follows:
        pInputFormat = av_find_input_format("h264");
        pFormatCtx->iformat = pInputFormat;
If avformat is blocked at this time_ open_ Input, the image is striped. According to the references, the avformat_open_input
The function is to detect the related variables of AVInputFormat structure

Final optimization scheme
Avformat is not masked_ open_ Input function, but specify the bitstream input format of AVInputFormat before calling
code
    AVInputFormat* pInputFormat = NULL;
    AVFormatContext* pFormatContext = NULL;
    pFormatContext = avformat_alloc_context();
    pInputFormat = av_find_input_format("h264");

From here, we can see that we can set the "iformat" attribute on the front end to determine the input format of the stream, so as to reduce the detection time of the stream

--------------------------------------------------------------------------------------------------------------------------------

AVInputFormat *av_probe_input_format3(ff_const59 AVProbeData *pd
, int is_opened, int *score_ret) {
    AVProbeData lpd = *pd;
    const AVInputFormat *fmt1 = NULL;
    ff_const59 AVInputFormat *fmt = NULL;
    int score, score_max = 0;

    //Traverse all multiplexers and demultiplexers
    //Old version demuxer_ The list is located in demuxer_list.c
//The new version is in allformats c
    while ((fmt1 = av_demuxer_iterate(&i))) {//Ftm1 = new version av_format_next(ftm1)
        score = 0;
        //If there is a probe function, call
        if (fmt1->read_probe) {
            score = fmt1->read_probe(&lpd);
        //Otherwise, match the extension (. mp4/.flv)
        } else if (fmt1->extensions) {
            if (av_match_ext(lpd.filename, fmt1->extensions))
                score = AVPROBE_SCORE_EXTENSION;//50
        }
        //Match media type (video/x-flv video/mp4)
        if (av_match_name(lpd.mime_type, fmt1->mime_type)) {
            if (AVPROBE_SCORE_MIME > score) {
                av_log(NULL, AV_LOG_DEBUG, "Probing %s score:%d increased to %d due to MIME type\n", fmt1->name, score, AVPROBE_SCORE_MIME);
                score = AVPROBE_SCORE_MIME;//75
            }
        }
        //Record the highest score
        if (score > score_max) {
            score_max = score;
            fmt       = (AVInputFormat*)fmt1;
        } else if (score == score_max)
            fmt = NULL;
    }
    *score_ret = score_max;

    return fmt;
}

av_ iformat_ The values of next (fmt1), fmt1 - > name are "flac", "flv", "hevc", "hls", etc. can this part be optimized?

Take mp4 as an example, read_probe calls Moov Mov of C_ probe

mov_probe(AVProbeData *p)
{
    int64_t offset;
    uint32_t tag;
    int score = 0;
    int moov_offset = -1;

    /* check file header */
    offset = 0;
    for (;;) {
        /* ignore invalid offset */
        if ((offset + 8) > (unsigned int)p->buf_size)
            break;
        tag = AV_RL32(p->buf + offset + 4);
        switch(tag) {
        /* check for obvious tags */
        case MKTAG('m','o','o','v'):
            moov_offset = offset + 4;
        case MKTAG('m','d','a','t'):
        case MKTAG('p','n','o','t'): /* detect movs with preview pics like ew.mov and april.mov */
        case MKTAG('u','d','t','a'): /* Packet Video PVAuthor adds this and a lot of more junk */
        case MKTAG('f','t','y','p'):å
            if (AV_RB32(p->buf+offset) < 8 &&
                (AV_RB32(p->buf+offset) != 1 ||
                 offset + 12 > (unsigned int)p->buf_size ||
                 AV_RB64(p->buf+offset + 8) == 0)) {
                score = FFMAX(score, AVPROBE_SCORE_EXTENSION);
            } else if (tag == MKTAG('f','t','y','p') &&
                       (   AV_RL32(p->buf + offset + 8) == MKTAG('j','p','2',' ')
                        || AV_RL32(p->buf + offset + 8) == MKTAG('j','p','x',' ')
                    )) {
                score = FFMAX(score, 5);
            } else {
                score = AVPROBE_SCORE_MAX;
            }
            offset = FFMAX(4, AV_RB32(p->buf+offset)) + offset;
            break;
        /* those are more common words, so rate then a bit less */
        case MKTAG('e','d','i','w'): /* xdcam files have reverted first tags */
        case MKTAG('w','i','d','e'):
        case MKTAG('f','r','e','e'):
        case MKTAG('j','u','n','k'):
        case MKTAG('p','i','c','t'):
            score  = FFMAX(score, AVPROBE_SCORE_MAX - 5);
            offset = FFMAX(4, AV_RB32(p->buf+offset)) + offset;
            break;
        case MKTAG(0x82,0x82,0x7f,0x7d):
        case MKTAG('s','k','i','p'):
        case MKTAG('u','u','i','d'):
        case MKTAG('p','r','f','l'):
            /* if we only find those cause probedata is too small at least rate them */
            score  = FFMAX(score, AVPROBE_SCORE_EXTENSION);
            offset = FFMAX(4, AV_RB32(p->buf+offset)) + offset;
            break;
        default:
            offset = FFMAX(4, AV_RB32(p->buf+offset)) + offset;
        }
    }
    if(score > AVPROBE_SCORE_MAX - 50 && moov_offset != -1) {
        /* moov atom in the header - we should make sure that this is not a
         * MOV-packed MPEG-PS */
        offset = moov_offset;

        while(offset < (p->buf_size - 16)){ /* Sufficient space */
               /* We found an actual hdlr atom */
            if(AV_RL32(p->buf + offset     ) == MKTAG('h','d','l','r') &&
               AV_RL32(p->buf + offset +  8) == MKTAG('m','h','l','r') &&
               AV_RL32(p->buf + offset + 12) == MKTAG('M','P','E','G')){
                av_log(NULL, AV_LOG_WARNING, "Found media data tag MPEGå indicating this is a MOV-packed MPEG-PS.\n");
                /* We found a media handler reference atom describing an
                 * MPEG-PS-in-MOV, return a
                 * low score to force expanding the probe window until
                 * mpegps_probe finds what it needs */
                return 5;
            }else
                /* Keep looking */
                offset+=2;
        }
    }

    return score;
}

Take hls as an example. When Name is hls, the corresponding function pointer will point to hls Function pointer of C

av_probe_input_format3 summary:

Traverse all demultiplexers and record the most matched demultiplexer
1. If there is a probe function, call and record the score, otherwise match the extension (50 points)
2. Then match the media type. If it matches and is higher than the previous score, it will be 75 points

Since the debugging routine is a private protocol, the third step will be taken

3) If the file name or type does not match, open the file, read the content and speculate

s->io_ The open function pointer points to io_open_default

static int io_open_default(AVFormatContext *s, AVIOContext **pb,
                           const char *url, int flags, AVDictionary **options)
{
    int loglevel;

    if (!strcmp(url, s->filename) ||
        s->iformat && !strcmp(s->iformat->name, "image2") ||
        s->oformat && !strcmp(s->oformat->name, "image2")
    ) {
        loglevel = AV_LOG_DEBUG;
    } else
        loglevel = AV_LOG_INFO;

    av_log(s, loglevel, "Opening \'%s\' for %s\n", url, flags & AVIO_FLAG_WRITE ? "writing" : "reading");

#if FF_API_OLD_OPEN_CALLBACKS
FF_DISABLE_DEPRECATION_WARNINGS
    if (s->open_cb)
        return s->open_cb(s, pb, url, flags, &s->interrupt_callback, options);
FF_ENABLE_DEPRECATION_WARNINGS
#endif

    return ffio_open_whitelist(pb, url, flags, &s->interrupt_callback, options, s->protocol_whitelist, s->protocol_blacklist);
}

int ffurl_connect(URLContext *uc, AVDictionary **options)
{
    int err;
    AVDictionary *tmp_opts = NULL;
    AVDictionaryEntry *e;

    if (!options)
        options = &tmp_opts;

    // Check that URLContext was initialized correctly and lists are matching if set
    av_assert0(!(e=av_dict_get(*options, "protocol_whitelist", NULL, 0)) ||
               (uc->protocol_whitelist && !strcmp(uc->protocol_whitelist, e->value)));
    av_assert0(!(e=av_dict_get(*options, "protocol_blacklist", NULL, 0)) ||
               (uc->protocol_blacklist && !strcmp(uc->protocol_blacklist, e->value)));

    if (uc->protocol_whitelist && av_match_list(uc->prot->name, uc->protocol_whitelist, ',') <= 0) {
        av_log(uc, AV_LOG_ERROR, "Protocol '%s' not on whitelist '%s'!\n", uc->prot->name, uc->protocol_whitelist);
        return AVERROR(EINVAL);
    }

    if (uc->protocol_blacklist && av_match_list(uc->prot->name, uc->protocol_blacklist, ',') > 0) {
        av_log(uc, AV_LOG_ERROR, "Protocol '%s' on blacklist '%s'!\n", uc->prot->name, uc->protocol_blacklist);
        return AVERROR(EINVAL);
    }

    if (!uc->protocol_whitelist && uc->prot->default_whitelist) {
        av_log(uc, AV_LOG_DEBUG, "Setting default whitelist '%s'\n", uc->prot->default_whitelist);
        uc->protocol_whitelist = av_strdup(uc->prot->default_whitelist);
        if (!uc->protocol_whitelist) {
            return AVERROR(ENOMEM);
        }
    } else if (!uc->protocol_whitelist)
        av_log(uc, AV_LOG_DEBUG, "No default whitelist set\n"); // This should be an error once all declare a default whitelist

    if ((err = av_dict_set(options, "protocol_whitelist", uc->protocol_whitelist, 0)) < 0)
        return err;
    if ((err = av_dict_set(options, "protocol_blacklist", uc->protocol_blacklist, 0)) < 0)
        return err;

    err =
        uc->prot->url_open2 ? uc->prot->url_open2(uc,
                                                  uc->filename,
                                                  uc->flags,
                                                  options) :
        uc->prot->url_open(uc, uc->filename, uc->flags);

    av_dict_set(options, "protocol_whitelist", NULL, 0);
    av_dict_set(options, "protocol_blacklist", NULL, 0);

    if (err)
        return err;
    uc->is_connected = 1;
    /* We must be careful here as ffurl_seek() could be slow,
     * for example for http */
    if ((uc->flags & AVIO_FLAG_WRITE) || !strcmp(uc->prot->name, "file"))
        if (!uc->is_streamed && ffurl_seek(uc, 0, SEEK_SET) < 0)
            uc->is_streamed = 1;
    return 0;
}

uc->prot->url_ Open2 will call open2 of the corresponding protocol according to the protocol. Here, the protocol is the private protocol ijkbds, so ijkbds will be called_ open

ijkbds_open will open the stream and finally UC - > is_ connected = 1

int ffio_open_whitelist(AVIOContext **s, const char *filename, int flags,
                         const AVIOInterruptCB *int_cb, AVDictionary **options,
                         const char *whitelist, const char *blacklist
                        )
{
    URLContext *h;
    int err;

    err = ffurl_open_whitelist(&h, filename, flags, int_cb, options, whitelist, blacklist, NULL);
    if (err < 0)
        return err;
    err = ffio_fdopen(s, h);
    if (err < 0) {
        ffurl_close(h);
        return err;
    }
    return 0;
}

Two steps:

1)ffurl_connect will eventually call open2 to open the corresponding protocol

2)ffio_fdopen will pass avio_alloc_context set read/write/seek related callbacks

int ffio_fdopen(AVIOContext **s, URLContext *h)
{
    AVIOInternal *internal = NULL;
    uint8_t *buffer = NULL;
    int buffer_size, max_packet_size;

    max_packet_size = h->max_packet_size;
    if (max_packet_size) {
        buffer_size = max_packet_size; /* no need to bufferize more than one packet */
    } else {
        buffer_size = IO_BUFFER_SIZE;
    }
    buffer = av_malloc(buffer_size);
    if (!buffer)
        return AVERROR(ENOMEM);

    internal = av_mallocz(sizeof(*internal));
    if (!internal)
        goto fail;

    internal->h = h;
//Set read/write/seek related callback
    *s = avio_alloc_context(buffer, buffer_size, h->flags & AVIO_FLAG_WRITE,
                            internal, io_read_packet, io_write_packet, io_seek);
    if (!*s)
        goto fail;

    (*s)->protocol_whitelist = av_strdup(h->protocol_whitelist);
    if (!(*s)->protocol_whitelist && h->protocol_whitelist) {
        avio_closep(s);
        goto fail;
    }
    (*s)->protocol_blacklist = av_strdup(h->protocol_blacklist);
    if (!(*s)->protocol_blacklist && h->protocol_blacklist) {
        avio_closep(s);
        goto fail;
    }
    (*s)->direct = h->flags & AVIO_FLAG_DIRECT;

    (*s)->seekable = h->is_streamed ? 0 : AVIO_SEEKABLE_NORMAL;
    (*s)->max_packet_size = max_packet_size;
    (*s)->min_packet_size = h->min_packet_size;
    if(h->prot) {
        (*s)->read_pause = io_read_pause;
        (*s)->read_seek  = io_read_seek;

        if (h->prot->url_read_seek)
            (*s)->seekable |= AVIO_SEEKABLE_TIME;
    }
    (*s)->short_seek_get = io_short_seek;
    (*s)->av_class = &ff_avio_class;
    return 0;
fail:
    av_freep(&internal);
    av_freep(&buffer);
    return AVERROR(ENOMEM);
}

2,read_header

The function pointer of mp4 will point to mov Mov of C file_ read_ header

Because this article takes mp4 as an example, let's first understand the file structure of mp4

It can be seen from the figure that these boxes are hierarchical. The eight bytes in the head of each box are fixed. The first four bytes are the size of the box, and the last four bytes are the type of the box, that is, fytp,moov and so on. This information is marked with MOVAtom in ffmpeg:

//In the code, atom is actually the box in MP4 protocol, with a total of 8 bytes
typedef struct MOVAtom {
    uint32_t type;//box type
    int64_t size; /* box total size (excluding the size and type fields) */
} MOVAtom;

Mp4 format stores data in the form of box, including nested boxes. All boxes are composed of size+type+body. In the outermost box, there are mainly three types of boxes: ftyp, moov and mdat. Ftyp is mainly the identification information in MP4 format. moov is the metadata of the sample data of the MP4 file, which is used to describe the size, location, dts, etc. of the sample. mdat is the specific data of the sample. The location of each sample data in the file is obtained through trak in moov. The analysis of mp4 box is actually to read various types of boxes and pass some data in them through file_read reads and assigns values.

static int mov_read_header(AVFormatContext *s)
{
    MOVContext *mov = s->priv_data;  //memset type is av0
    AVIOContext *pb = s->pb;
    int j, err;
    MOVAtom atom = { AV_RL32("root") }; //Create a parent box, including ftyp, moov and mdat, and assign an initial value
    int i;
    ...
    mov->fc = s; //It is convenient to assign a value to the vcmoatextfor writing code
    mov->trak_index = -1;
    /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
    if (pb->seekable & AVIO_SEEKABLE_NORMAL)
        atom.size = avio_size(pb); //Overall size of source MP4 file
    else
        atom.size = INT64_MAX;

    /* check MOV header */
    //Jump out when you finish reading moov's box, but here you can only read it once because there is a mov_ read_ The default interface will continuously read the nested box down, as long as atom Size has enough data
    do {
        if (mov->moov_retry)
            avio_seek(pb, 0, SEEK_SET);
            //There are nested boxes. Continue to read
        if ((err = mov_read_default(mov, pb, atom)) < 0) {
            av_log(s, AV_LOG_ERROR, "error reading header\n");
            goto fail;
        }
    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->moov_retry++);
    if (!mov->found_moov) { //Is moov finished reading the sign
        av_log(s, AV_LOG_ERROR, "moov atom not found\n");
        err = AVERROR_INVALIDDATA;
        goto fail;
    }
    av_log(mov->fc, AV_LOG_TRACE, "on_parse_exit_offset=%"PRId64"\n", avio_tell(pb));

    if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
        ......
    }

    /* copy timecode metadata from tmcd tracks to the related video streams */
    for (i = 0; i < s->nb_streams; i++) {
        ...
    }
    export_orphan_timecode(s);

    //S - > streams is to read the memory opened by trak box and assign a value
    for (i = 0; i < s->nb_streams; i++) {
        AVStream *st = s->streams[i];
        MOVStreamContext *sc = st->priv_data;
        fix_timescale(mov, sc); //The time scaling ratio in mdhd is generally 1
        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
            st->codecpar->codec_id   == AV_CODEC_ID_AAC) {
            st->internal->skip_samples = sc->start_pad;
        }
        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && sc->nb_frames_for_fps > 0 && sc->duration_for_fps > 0)
            av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den,
                      sc->time_scale*(int64_t)sc->nb_frames_for_fps, sc->duration_for_fps, INT_MAX);
        if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
            if (st->codecpar->width <= 0 || st->codecpar->height <= 0) {
                st->codecpar->width  = sc->width;
                st->codecpar->height = sc->height;
            }
            if (st->codecpar->codec_id == AV_CODEC_ID_DVD_SUBTITLE) {
                if ((err = mov_rewrite_dvd_sub_extradata(st)) < 0)
                    goto fail;
            }
        }
        ...
    }
    if (mov->trex_data) {
        ...
    }
    if (mov->use_mfra_for > 0) {
        ...
    }
    for (i = 0; i < mov->bitrates_count && i < s->nb_streams; i++) {
        if (mov->bitrates[i]) {
            s->streams[i]->codecpar->bit_rate = mov->bitrates[i];//Calculate bit rate
        }
    }

    ff_rfps_calculate(s);

    for (i = 0; i < s->nb_streams; i++) {
        AVStream *st = s->streams[i];
        MOVStreamContext *sc = st->priv_data;

        switch (st->codecpar->codec_type) {
        case AVMEDIA_TYPE_AUDIO:
            err = ff_replaygain_export(st, s->metadata);
            if (err < 0)
                goto fail;
            break;
        case AVMEDIA_TYPE_VIDEO:
            ...
    }
    ff_configure_buffers_for_index(s, AV_TIME_BASE);

    for (i = 0; i < mov->frag_index.nb_items; i++)
        if (mov->frag_index.item[i].moof_offset <= mov->fragment.moof_offset)
            mov->frag_index.item[i].headers_read = 1;

    return 0;
fail:
    mov_read_close(s);
    return err;
}
//Register all interfaces that parse atom together
typedef struct MOVParseTableEntry {
    uint32_t type;
    int (*parse)(MOVContext *ctx, AVIOContext *pb, MOVAtom atom);
} MOVParseTableEntry;

static const MOVParseTableEntry mov_default_parse_table[] = {
{ MKTAG('f','t','y','p'), mov_read_ftyp },
{ MKTAG('t','r','a','k'), mov_read_trak },
{ MKTAG('t','r','a','f'), mov_read_default },
{ MKTAG('s','t','s','c'), mov_read_stsc },
{ MKTAG('s','t','s','d'), mov_read_stsd }, /* sample description */
{ MKTAG('s','t','s','s'), mov_read_stss }, /* sync sample */
{ MKTAG('s','t','s','z'), mov_read_stsz }, /* sample size */
{ MKTAG('s','t','t','s'), mov_read_stts },
{ MKTAG('s','t','z','2'), mov_read_stsz }, /* compact sample size */
.....
}

//This interface is the most important interface, which is mainly responsible for reading nested box es with circular code, as long as atom Enough data
//For example, Moov - > Trak - > mdia - > stbl - > STSD, stts, stss, ctts, stco, etc
//So it's mov_ read_ Mov is executed only once in the header interface_ read_ Default is OK, which is to cycle through the nested box starting from the created parent box
static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
{
    int64_t total_size = 0;//Read an atom (whether nested or not), the number of bytes that have been read
    MOVAtom a;
    int i;

    if (c->atom_depth > 10) {
        av_log(c->fc, AV_LOG_ERROR, "Atoms too deeply nested\n");
        return AVERROR_INVALIDDATA;
    }
    c->atom_depth ++; //Number of atom read layers

    if (atom.size < 0)
        atom.size = INT64_MAX;
    while (total_size <= atom.size - 8 && !avio_feof(pb)) {
        int (*parse)(MOVContext*, AVIOContext*, MOVAtom) = NULL;
        a.size = atom.size;
        a.type=0;
        if (atom.size >= 8) {
            a.size = avio_rb32(pb);//avio_rb32 reads the four byte function, and the box size (size+type+body size)
            a.type = avio_rl32(pb);//avio_rb32 read four byte function, which is of type box
            //Type comparison
            if (((a.type == MKTAG('f','r','e','e') && c->moov_retry) ||
                  a.type == MKTAG('h','o','o','v')) &&
                a.size >= 8 &&
                c->fc->strict_std_compliance < FF_COMPLIANCE_STRICT) {
                uint32_t type;
                avio_skip(pb, 4);
                type = avio_rl32(pb);
                avio_seek(pb, -8, SEEK_CUR);
                if (type == MKTAG('m','v','h','d') ||
                    type == MKTAG('c','m','o','v')) {
                    av_log(c->fc, AV_LOG_ERROR, "Detected moov in a free or hoov atom.\n");
                    a.type = MKTAG('m','o','o','v');
                }
            }
            if (atom.type != MKTAG('r','o','o','t') &&
                atom.type != MKTAG('m','o','o','v')) {
                if (a.type == MKTAG('t','r','a','k') ||
                    a.type == MKTAG('m','d','a','t')) {
                    av_log(c->fc, AV_LOG_ERROR, "Broken file, trak/mdat not at top-level\n");
                    avio_skip(pb, -8);
                    c->atom_depth --;
                    return 0;
                }
            }
            total_size += 8; //Read 8 bytes at a time, of which 4 bytes are the length of box and 4 bytes are the tpye of box
            if (a.size == 1 && total_size + 8 <= atom.size) { /* 64 bit extended size */
                a.size = avio_rb64(pb) - 8;
                total_size += 8;
            }
        }
        av_log(c->fc, AV_LOG_TRACE, "type:'%s' parent:'%s' sz: %"PRId64" %"PRId64" %"PRId64"\n",
               av_fourcc2str(a.type), av_fourcc2str(atom.type), a.size, total_size, atom.size);
        if (a.size == 0) {
            a.size = atom.size - total_size + 8;
        }
        a.size -= 8;
        if (a.size < 0)
            break;
        a.size = FFMIN(a.size, atom.size - total_size);

       //Through mov_ default_ parse_ Compare table with type and assign value to the corresponding resolution box interface
        for (i = 0; mov_default_parse_table[i].type; i++)
            if (mov_default_parse_table[i].type == a.type) {
                parse = mov_default_parse_table[i].parse;
                break;//Find the corresponding box and exit the comparison
            }

        // container is user data
        if (!parse && (atom.type == MKTAG('u','d','t','a') ||
                       atom.type == MKTAG('i','l','s','t')))
            parse = mov_read_udta_string;

        // Supports parsing the QuickTime Metadata Keys.
        // https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/Metadata/Metadata.html
        if (!parse && c->found_hdlr_mdta &&
            atom.type == MKTAG('m','e','t','a') &&
            a.type == MKTAG('k','e','y','s') &&
            c->meta_keys_count == 0) {
            parse = mov_read_keys;
        }

        if (!parse) { /* If the corresponding type is not found, skip the atom data skip leaf atoms data */
            avio_skip(pb, a.size);
        } else {
            int64_t start_pos = avio_tell(pb);//The offset between the current file pointer position and the first address of the file. In order to verify whether the box or atom has been read, for example, start for the first time_ pos=8
            int64_t left;
            int err = parse(c, pb, a);//parse is a function pointer. If tpye=ftyp, MOV will be called_ read_ ftyp
            if (err < 0) {
                c->atom_depth --;
                return err;
            }
            if (c->found_moov && c->found_mdat &&
                ((!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete) ||
                 start_pos + a.size == avio_size(pb))) {
                if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete)
                    c->next_root_atom = start_pos + a.size;
                c->atom_depth --;
                return 0;
            }
            left = a.size - avio_tell(pb) + start_pos;//Verify whether the box or atom has been read; avio_ The offset of tell relative to the starting position of the file. The first execution: a.size = 16,start_pos = 8,avio_tell(pb) = 24,left = 0
            if (left > 0) /* skip garbage at atom end */
                avio_skip(pb, left);
            else if (left < 0) {
                av_log(c->fc, AV_LOG_WARNING,
                       "overread end of atom '%s' by %"PRId64" bytes\n",
                       av_fourcc2str(a.type), -left);
                avio_seek(pb, left, SEEK_CUR);
            }
        }
//total_size is the total size read in a cycle
        total_size += a.size;//An atom has been read
    }

    if (total_size < atom.size && atom.size < 0x7ffff)
        avio_skip(pb, atom.size - total_size);

    c->atom_depth --;
    return 0;
}

mov_ read_ Work of header

  • Check the validity of parameters.
    • Such as decryption_ key_ Range of len.
  • Call mov_read_default performs box parsing under root.

among

int (*parse)(MOVContext*, AVIOContext*, MOVAtom) = NULL

This is a function pointer, which will call the relevant parsing function according to the type of type. For example, when the type of box is ftyp, MOV will be called_ read_ ftyp

/* read major brand, minor version and compatible brands and store them as metadata */
static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
{
    uint32_t minor_ver;
    int comp_brand_size;
    char* comp_brands_str;
    uint8_t type[5] = {0};
    int ret = ffio_read_size(pb, type, 4);
    if (ret < 0)
        return ret;

    if (strcmp(type, "qt  "))
        c->isom = 1;
    av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type);
    av_dict_set(&c->fc->metadata, "major_brand", type, 0);
    minor_ver = avio_rb32(pb); /* minor version */
    av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);

    comp_brand_size = atom.size - 8;
    if (comp_brand_size < 0)
        return AVERROR_INVALIDDATA;
    comp_brands_str = av_malloc(comp_brand_size + 1); /* Add null terminator */
    if (!comp_brands_str)
        return AVERROR(ENOMEM);

    ret = ffio_read_size(pb, comp_brands_str, comp_brand_size);
    if (ret < 0) {
        av_freep(&comp_brands_str);
        return ret;
    }
    comp_brands_str[comp_brand_size] = 0;
    av_dict_set(&c->fc->metadata, "compatible_brands", comp_brands_str, 0);//comp_brands_str = "ios4hvc1"
    av_freep(&comp_brands_str);

    return 0;
}

Since ftyp is not nested, MOV is not called_ read_ Default, and moov will have nesting

The length of ftyp is 0x18, which corresponds to avio_tell(pb) = 24, the length of moov is 0x00110b48, i.e. 1117000, so the real data address of mdat is ftyp + moov + 8 (four bytes are the length of mdat and four bytes are the tpye of mdat) = 24 + 1117000 + 8 = 1117032

static int mov_read_moov(MOVContext *c, AVIOContext *pb, MOVAtom atom)
{
    int ret;

    if (c->found_moov) {
        av_log(c->fc, AV_LOG_WARNING, "Found duplicated MOOV Atom. Skipped it\n");
        avio_skip(pb, atom.size);
        return 0;
    }

    if ((ret = mov_read_default(c, pb, atom)) < 0)
        return ret;
    /* We have parsed the 'moov' box. As long as we find 'mdat', we can terminate the parsing */
    /* Therefore, if we pass through the network, we will not parse the whole file */
    c->found_moov=1;
    return 0; /* now go for mdat */
}

c->found_ moov && c->found_ When MDAT is 1

            if (c->found_moov && c->found_mdat &&
                ((!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete) ||
                 start_pos + a.size == avio_size(pb))) {
                if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete)
                    c->next_root_atom = start_pos + a.size;
                c->atom_depth --;
                return 0;
            }

Discovery start_pos + a.size == avio_ The size (PB) condition does not meet the requirements and will not exit the parsing. Why do you need this condition? Can you remove this condition?

Because the box doesn't really read data, it just sets the flag to 1

static int mov_read_mdat(MOVContext *c, AVIOContext *pb, MOVAtom atom)
{
    if (atom.size == 0) /* wrong one (MP4) */
        return 0;
    c->found_mdat=1;
    return 0; /* now go for moov */
}

So you'll come back

left = a.size - avio_tell(pb) + start_pos;//Verify that the box or atom has been read
            if (left > 0) /* skip garbage at atom end */
                avio_skip(pb, left);//When starting the broadcast, it will first seek to the end of the file

 

Keywords: ffmpeg ijkPlayer

Added by th3void on Thu, 10 Feb 2022 17:48:32 +0200