A simple implementation of OSD (text inversion - opencv, font switching - freetype 2)

In a machine vision project, some algorithm results or other text information are often displayed on some pictures to enhance the algorithm visualization or prompt the demonstration effect.To put it plainly, you need to display text somewhere on the picture.

Introduction to OSD

The OSD here is short for on screen display, which translates to the display on the screen.The term "screen" here refers to a picture.So OSD can be interpreted as overlaying information on a single picture.

Text inversion and font switching

Text inversion: As the name implies, the text changes to the opposite color according to some conditions (background picture).Columns such as black and white.

Font switching: Font is what a word looks like.Such as Kai Tie, cursive script, Song Tie and so on.

Average Grayscale and Freetype 2

Average gray level: The average gray level is calculated by using opencv to calculate the image data of the bitmap position of the corresponding font.It is mainly to judge the brightness of this image data, if it is too bright (white), it is black, if it is too black, it is white.

freetype2: This is an open source framework for loading standard font formats, where you can get bitmap s for your incoming words.

python instance (c++ version used in the project, no hair, very similar)

I won't analyze it here, just comment it out, explain the idea, roughly like this, I can achieve the function I want.Here I strongly recommend that python be fast and easy to use if you are doing code to validate classes.

from freetype import *

import numpy as np
import cv2
import math
import numpy
import matplotlib.pyplot as plt

#return bgr mat and gray mat
def GetBGRAndGrayImg(filename):
    img = cv2.imread(filename)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img, img_gray

#Initialize Freetype 2 Word Library
freetype_face = None
def InitFreeType(path):
    global freetype_face
    freetype_face = Face(path)
#Set the size of the text you want to get from the font library, where the size is an approximation (the nearest size to an existing size).Each word in the font library may have multiple sizes.
def SetFreeTypeCharPixelSize(pixel_w, pixel_h):
    freetype_face.set_pixel_sizes( pixel_w, pixel_h )
#Set Font Rotation
def SetFreeTypeCharRotate(angle):
    matrix = Matrix(int((math.cos(angle)) * 0x10000), int((math.sin(angle)) * 0x10000),
                    int((math.sin(angle)) * 0x10000), int((math.cos(angle)) * 0x10000))
    freetype_face.set_transform(matrix, Vector(0, 0))

#return a matrix of char, white pixel is actual font, black pixel is background of font
def GetCharMatrixFromFont(char):
    bitmap = freetype_face.glyph.bitmap
    return numpy.array(bitmap.buffer).reshape(bitmap.rows, bitmap.width), bitmap.width, bitmap.rows

# The following is based on the incoming location, text, and then calculates the average gray level of the area corresponding to the actual image location for each text bitmap matrix, determines what color to display, and then replaces the pixels.Note: The lower boundary of the first word used here is the standard line for alignment.
def GetOSDImg(img, img_g, text, start_pos, interval=0):
    next_char_pos_x = start_pos[0]
    cur_pos_x = start_pos[0]
    next_char_pos_y = start_pos[1]
    cur_pos_y = start_pos[1]
    baseline_y =  start_pos[1]
    for text_i, text_e in enumerate(text):
        char_array, char_width, char_height = GetCharMatrixFromFont(text_e)
        #caculate gray
        gray_matrix = img_g[next_char_pos_x:next_char_pos_x + char_width, next_char_pos_y:next_char_pos_y+char_height]
        gray_matrix_mean = gray_matrix.mean()

        if text_i == 0:
            baseline_y += char_height

        cur_pos_y = baseline_y-char_height

        for h, h_e in enumerate(char_array):
            for w, w_e in enumerate(h_e):
                if w_e == 0:
                if gray_matrix_mean > 128:
                    img[ cur_pos_y + h, cur_pos_x + w ] = [0, 0, 0]

                    img[ cur_pos_y + h, cur_pos_x + w ] = [255, 255, 255]

        #caculate next char position
        cur_pos_x += char_width + interval
        #cur_pos_y += char_height
        next_char_pos_x += char_width
        next_char_pos_y += char_height
    return img
if __name__ == "__main__":

    img_t, img_g_t = GetBGRAndGrayImg("test.jpg")
    img = cv2.resize(img_t, (352, 288), interpolation=cv2.INTER_AREA)
    img_g = cv2.resize(img_g_t, (352, 288), interpolation=cv2.INTER_AREA)

    # SetFreeTypeCharPixelSize(10, 10)
    freetype_face.set_char_size(5*64, 0, 300, 0)

    osd_img = GetOSDImg(img, img_g, "km/habcdefg How are you?", np.array([50, 50]), 3)
    #osd_img = osd_img.reshape(288, 352, 3)[:, :, (2, 1, 0)]

    cv2.imshow('osd', osd_img)
    # plt.imshow(osd_img)
    plt.xticks([]), plt.yticks([])

    # # First pass to compute bbox
    # width, height, baseline = 0, 0, 0
    # previous = 0
    # for i, c in enumerate(text):
    #     face.load_char(c)
    #     bitmap = slot.bitmap
    #     height = max(height,
    #                  bitmap.rows + max(0,-(slot.bitmap_top-bitmap.rows)))
    #     baseline = max(baseline, max(0,-(slot.bitmap_top-bitmap.rows)))
    #     kerning = face.get_kerning(previous, c)
    #     width += (slot.advance.x >> 6) + (kerning.x >> 6)
    #     previous = c
    # Z = numpy.zeros((height,37), dtype=numpy.ubyte)
    # print(Z.shape)
    # # Second pass for actual rendering
    # x, y = 0, 0
    # previous = 0
    # for c in text:
    #     face.load_char(c)
    #     bitmap = slot.bitmap
    #     top = slot.bitmap_top
    #     left = slot.bitmap_left
    #     w,h = bitmap.width, bitmap.rows
    #     y = height-baseline-top
    #     kerning = face.get_kerning(previous, c)
    #     x += (kerning.x >> 6)
    #     print(x, y ,h, w)
    #     Z[y:y+h,x:x+w] += numpy.array(bitmap.buffer, dtype='ubyte').reshape(h,w)
    #     x += (slot.advance.x >> 6)
    #     previous = c
    # print(Z.shape)
    # img = cv2.imread("test.jpg")
    # img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # array = np.array(img)
    # #RGB
    # array = array.reshape(520, 520, 3)[:, :, (2, 1, 0)]
    # print(array)
    # array_g = np.array(img_g)
    # array_g = array_g.reshape(520, 520)
    # print(array_g.shape)
    # x = 250
    # y = 250
    # front_matrix = array_g[x: x + Z.shape[0], y: y+Z.shape[1] ]
    # front_matrix_mean = front_matrix.mean()
    # print(front_matrix_mean)
    # for h, h_e in enumerate(Z):
    #     for w, w_e in enumerate(h_e):
    #         if w_e == 0:
    #             continue
    #         if front_matrix_mean > 128:
    #             #R
    #             array[ x + w, y + h , 0] = 0
    #             #G
    #             array[ x + w, y + h, 1 ] = 0
    #             #B
    #             array[ x + w, y + h , 2] = 0
    #         else:
    #             #R
    #             array[ x + w, y + h , 0] = 255
    #             #G
    #             array[ x + w, y + h, 1 ] = 255
    #             #B
    #             array[ x + w, y + h , 2] = 255
    # # plt.figure(figsize=(10, 10*Z.shape[0]/float(Z.shape[1])))
    # plt.imshow(array)
    # plt.xticks([]), plt.yticks([])
    # plt.show()


Actual test results:

When the font file opens:

