Analysis of audio and video synchronization strategy for ijkplayer

Audio and video synchronization is a necessary topic for players, and it is also an interview question often asked by interviewers. As we all know, there are three kinds of audio and video synchronization clocks. The audio clock is used as the master clock by default. But the interviewer will have other variants: what if the audio of the live stream lags behind or is interrupted? If there is no audio stream, what clock is used as the master clock? What if there are two audio streams (playback scene of original sound and style)? What if the video timestamp is backward or ahead, and how to deal with different degrees of backwardness? Is it affected if you set the double speed playback?

To summarize the audio and video synchronization problems:

1. Backward or advanced processing of video timestamp;

2. Processing of double speed playback;

3. Processing of dual track audio playback;

4. Processing without audio stream;

5. Processing of audio streaming or interruption;

We're from ijkplayer's ff_ffplay.c for analysis. The basic methods are get_clock(),set_clock(),set_clock_at(),set_clock_speed(), the specific code is as follows:

static double get_clock(Clock *c)
{
    if (*c->queue_serial != c->serial)
        return NAN;
    if (c->paused) {
        return c->pts;
    } else {
        double time = av_gettime_relative() / 1000000.0;
        return c->pts_drift + time - (time - c->last_updated) * (1.0 - c->speed);
    }
}

static void set_clock_at(Clock *c, double pts, int serial, double time)
{
    c->pts = pts;
    c->last_updated = time;
    c->pts_drift = c->pts - time;
    c->serial = serial;
}

static void set_clock(Clock *c, double pts, int serial)
{
    double time = av_gettime_relative() / 1000000.0;
    set_clock_at(c, pts, serial, time);
}

Then get the master clock type and master clock. First of all, as we all know, people are more sensitive to sound than picture, which is determined by hearing and vision. Therefore, the audio clock is generally the default as the master clock. If the video clock is used as the master clock by default, use the video clock if there is video, otherwise use the audio clock; If the audio clock is used as the master clock by default, use the audio clock if there is audio, otherwise use the external clock; Use an external clock in other cases. The code is as follows:

static int get_master_sync_type(VideoState *is) {
    if (is->av_sync_type == AV_SYNC_VIDEO_MASTER) {
        if (is->video_st)
            return AV_SYNC_VIDEO_MASTER;
        else
            return AV_SYNC_AUDIO_MASTER;
    } else if (is->av_sync_type == AV_SYNC_AUDIO_MASTER) {
        if (is->audio_st)
            return AV_SYNC_AUDIO_MASTER;
        else
            return AV_SYNC_EXTERNAL_CLOCK;
    } else {
        return AV_SYNC_EXTERNAL_CLOCK;
    }
}

static double get_master_clock(VideoState *is)
{
    double val;

    switch (get_master_sync_type(is)) {
        case AV_SYNC_VIDEO_MASTER:
            val = get_clock(&is->vidclk);
            break;
        case AV_SYNC_AUDIO_MASTER:
            val = get_clock(&is->audclk);
            break;
        default:
            val = get_clock(&is->extclk);
            break;
    }
    return val;
}

Next, set and check the clock speed. You need to set the clock speed when doubling the speed. The code is as follows:

static void set_clock_speed(Clock *c, double speed)
{
    set_clock(c, get_clock(c), c->serial);
    c->speed = speed;
}

static void check_external_clock_speed(VideoState *is) {
   if ((is->video_stream >= 0 && is->videoq.nb_packets <= EXTERNAL_CLOCK_MIN_FRAMES) ||
       (is->audio_stream >= 0 && is->audioq.nb_packets <= EXTERNAL_CLOCK_MIN_FRAMES)) {
       set_clock_speed(&is->extclk, FFMAX(EXTERNAL_CLOCK_SPEED_MIN, is->extclk.speed - EXTERNAL_CLOCK_SPEED_STEP));
   } else if ((is->video_stream < 0 || is->videoq.nb_packets > EXTERNAL_CLOCK_MAX_FRAMES) &&
              (is->audio_stream < 0 || is->audioq.nb_packets > EXTERNAL_CLOCK_MAX_FRAMES)) {
       set_clock_speed(&is->extclk, FFMIN(EXTERNAL_CLOCK_SPEED_MAX, is->extclk.speed + EXTERNAL_CLOCK_SPEED_STEP));
   } else {
       double speed = is->extclk.speed;
       // if isn't normal speed, need to set clock speed
       if (speed != 1.0)
           set_clock_speed(&is->extclk, speed + EXTERNAL_CLOCK_SPEED_STEP * (1.0 - speed) / fabs(1.0 - speed));
   }
}

During audio playback, if the audio clock lags behind or is abnormal, the external clock needs to be synchronized to the audio clock. The specific code is in SDL_ audio_ In the callback() method:

static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
{
    ......
    is->audio_write_buf_size = is->audio_buf_size - is->audio_buf_index;
    /* Let's assume the audio driver that is used by SDL has two periods. */
    if (!isnan(is->audio_clock)) {
        set_clock_at(&is->audclk, is->audio_clock - (double)(is->audio_write_buf_size) / is->audio_tgt.bytes_per_sec - SDL_AoutGetLatencySeconds(ffp->aout), is->audio_clock_serial, ffp->audio_callback_time / 1000000.0);
        // sync external clock to audio clock
        sync_clock_to_slave(&is->extclk, &is->audclk);
    }
    if (!ffp->first_audio_frame_rendered) {
        ffp->first_audio_frame_rendered = 1;
        ffp_notify_msg1(ffp, FFP_MSG_AUDIO_RENDERING_START);
    }
}

During video playback, it will check the external clock speed, calculate the target delay time, and update the pts display timestamp of the video.

When calculating the delay, if the master clock is not the video clock, the difference diff between the video clock and the master clock will be calculated, and then diff and sync will be used_ Threshold comparison, final update delay time:

static double compute_target_delay(FFPlayer *ffp, double delay, VideoState *is)
{
    double sync_threshold, diff = 0;
    /* update delay to follow master synchronisation source */
    if (get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER) {
        /* if video is slave, we try to correct big delays by
           duplicating or deleting a frame */
        diff = get_clock(&is->vidclk) - get_master_clock(is);
        /* skip or repeat frame. We take into account the
           delay to compute the threshold. I still don't know
           if it is the best guess */
        sync_threshold = FFMAX(AV_SYNC_THRESHOLD_MIN, FFMIN(AV_SYNC_THRESHOLD_MAX, delay));
        /* -- by bbcallen: replace is->max_frame_duration with AV_NOSYNC_THRESHOLD */
        if (!isnan(diff) && fabs(diff) < AV_NOSYNC_THRESHOLD) {
            if (diff <= -sync_threshold)
                delay = FFMAX(0, delay + diff);
            else if (diff >= sync_threshold && delay > AV_SYNC_FRAMEDUP_THRESHOLD)
                delay = delay + diff;
            else if (diff >= sync_threshold)
                delay = 2 * delay;
        }
    }
    if (ffp) {
        ffp->stat.avdelay = delay;
        ffp->stat.avdiff  = diff;
    }
    return delay;
}

The method of updating video pts is mainly to reset the clock and synchronize the slave clock:

static void update_video_pts(VideoState *is, double pts, int64_t pos, int serial) {
    /* update current video pts */
    set_clock(&is->vidclk, pts, serial);
    sync_clock_to_slave(&is->extclk, &is->vidclk);
}

Video playback core code in video_refresh() method. If the video timestamp lag is less than delay, render directly; If the master clock is not a video clock and the video timestamp lags behind by more than duration, discard the current video frame and take the next frame:

static void video_refresh(FFPlayer *opaque, double *remaining_time)
{
    // Check external clock
    if (!is->paused && get_master_sync_type(is) == AV_SYNC_EXTERNAL_CLOCK && is->realtime)
        check_external_clock_speed(is);
    ......
    if (is->video_st) {
retry:
        if (frame_queue_nb_remaining(&is->pictq) == 0) {
            // nothing to do, no picture to display in the queue
        } else {
            /* compute nominal last_duration */
            last_duration = vp_duration(is, lastvp, vp);
            // Calculate delay time
            delay = compute_target_delay(ffp, last_duration, is);
            time= av_gettime_relative()/1000000.0;
            if (isnan(is->frame_timer) || time < is->frame_timer)
                is->frame_timer = time;
            // The video timestamp is less than delay, so you can render it directly
            if (time < is->frame_timer + delay) {
                *remaining_time = FFMIN(is->frame_timer + delay - time, *remaining_time);
                goto display;
            }

            is->frame_timer += delay;
            if (delay > 0 && time - is->frame_timer > AV_SYNC_THRESHOLD_MAX)
                is->frame_timer = time;
            // Update video pts
            SDL_LockMutex(is->pictq.mutex);
            if (!isnan(vp->pts))
                update_video_pts(is, vp->pts, vp->pos, vp->serial);
            SDL_UnlockMutex(is->pictq.mutex);
         
            if (frame_queue_nb_remaining(&is->pictq) > 1) {
                Frame *nextvp = frame_queue_peek_next(&is->pictq);
                duration = vp_duration(is, vp, nextvp);
                // If the master clock is not a video clock and the video timestamp lags behind by more than duration, the current video frame is discarded and the next frame is taken
                if(!is->step && (ffp->framedrop > 0 || (ffp->framedrop && get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER)) && time > is->frame_timer + duration) {
                    frame_queue_next(&is->pictq);
                    goto retry;
                }
            }
            
            frame_queue_next(&is->pictq);
            is->force_refresh = 1;
        }
display:
        /* display picture */
        if (!ffp->display_disable && is->force_refresh && is->show_mode == SHOW_MODE_VIDEO && is->pictq.rindex_shown)
            video_display2(ffp);
    }
    is->force_refresh = 0;
}

If there is dual track audio playback, you need to select one of the audio clocks as the master clock. For example, in the scene where the original sound and the accompaniment play simultaneously, the audio clock of the original sound is selected as the master clock, and the accompaniment is synchronized with the original sound.

Added by Daniel0 on Sat, 20 Nov 2021 23:41:58 +0200