#PS: To reproduce, please indicate the source. I have copyright
#PS: This is only My Own understanding, if it's with you
#Principles conflict, please understand, do not spray
Background:
In a machine vision project, some algorithm results or other text information are often displayed on some pictures to enhance the algorithm visualization or prompt the demonstration effect.To put it plainly, you need to display text somewhere on the picture.
Introduction to OSD
The OSD here is short for on screen display, which translates to the display on the screen.The term "screen" here refers to a picture.So OSD can be interpreted as overlaying information on a single picture.
Text inversion and font switching
Text inversion: As the name implies, the text changes to the opposite color according to some conditions (background picture).Columns such as black and white.
Font switching: Font is what a word looks like.Such as Kai Tie, cursive script, Song Tie and so on.
Average Grayscale and Freetype 2
Average gray level: The average gray level is calculated by using opencv to calculate the image data of the bitmap position of the corresponding font.It is mainly to judge the brightness of this image data, if it is too bright (white), it is black, if it is too black, it is white.
freetype2: This is an open source framework for loading standard font formats, where you can get bitmap s for your incoming words.
python instance (c++ version used in the project, no hair, very similar)
I won't analyze it here, just comment it out, explain the idea, roughly like this, I can achieve the function I want.Here I strongly recommend that python be fast and easy to use if you are doing code to validate classes.
from freetype import * import numpy as np import cv2 import math import numpy import matplotlib.pyplot as plt #return bgr mat and gray mat def GetBGRAndGrayImg(filename): #BGR img = cv2.imread(filename) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) return img, img_gray #Initialize Freetype 2 Word Library freetype_face = None def InitFreeType(path): global freetype_face freetype_face = Face(path) return #Set the size of the text you want to get from the font library, where the size is an approximation (the nearest size to an existing size).Each word in the font library may have multiple sizes. def SetFreeTypeCharPixelSize(pixel_w, pixel_h): freetype_face.set_pixel_sizes( pixel_w, pixel_h ) return #Set Font Rotation def SetFreeTypeCharRotate(angle): matrix = Matrix(int((math.cos(angle)) * 0x10000), int((math.sin(angle)) * 0x10000), int((math.sin(angle)) * 0x10000), int((math.cos(angle)) * 0x10000)) freetype_face.set_transform(matrix, Vector(0, 0)) return #return a matrix of char, white pixel is actual font, black pixel is background of font def GetCharMatrixFromFont(char): freetype_face.load_char(char) bitmap = freetype_face.glyph.bitmap return numpy.array(bitmap.buffer).reshape(bitmap.rows, bitmap.width), bitmap.width, bitmap.rows # The following is based on the incoming location, text, and then calculates the average gray level of the area corresponding to the actual image location for each text bitmap matrix, determines what color to display, and then replaces the pixels.Note: The lower boundary of the first word used here is the standard line for alignment. def GetOSDImg(img, img_g, text, start_pos, interval=0): next_char_pos_x = start_pos[0] cur_pos_x = start_pos[0] next_char_pos_y = start_pos[1] cur_pos_y = start_pos[1] baseline_y = start_pos[1] for text_i, text_e in enumerate(text): char_array, char_width, char_height = GetCharMatrixFromFont(text_e) #caculate gray gray_matrix = img_g[next_char_pos_x:next_char_pos_x + char_width, next_char_pos_y:next_char_pos_y+char_height] gray_matrix_mean = gray_matrix.mean() if text_i == 0: baseline_y += char_height cur_pos_y = baseline_y-char_height for h, h_e in enumerate(char_array): for w, w_e in enumerate(h_e): if w_e == 0: continue if gray_matrix_mean > 128: #RGB img[ cur_pos_y + h, cur_pos_x + w ] = [0, 0, 0] else: #RGB img[ cur_pos_y + h, cur_pos_x + w ] = [255, 255, 255] #caculate next char position cur_pos_x += char_width + interval #cur_pos_y += char_height next_char_pos_x += char_width next_char_pos_y += char_height return img if __name__ == "__main__": img_t, img_g_t = GetBGRAndGrayImg("test.jpg") img = cv2.resize(img_t, (352, 288), interpolation=cv2.INTER_AREA) img_g = cv2.resize(img_g_t, (352, 288), interpolation=cv2.INTER_AREA) InitFreeType('mmm.ttf') # SetFreeTypeCharPixelSize(10, 10) freetype_face.set_char_size(5*64, 0, 300, 0) SetFreeTypeCharRotate(0) osd_img = GetOSDImg(img, img_g, "km/habcdefg How are you?", np.array([50, 50]), 3) #plt.imshow(osd_img) #osd_img = osd_img.reshape(288, 352, 3)[:, :, (2, 1, 0)] cv2.imshow('osd', osd_img) cv2.waitKey(1) # plt.imshow(osd_img) plt.xticks([]), plt.yticks([]) plt.show() # # # First pass to compute bbox # width, height, baseline = 0, 0, 0 # previous = 0 # for i, c in enumerate(text): # face.load_char(c) # bitmap = slot.bitmap # height = max(height, # bitmap.rows + max(0,-(slot.bitmap_top-bitmap.rows))) # baseline = max(baseline, max(0,-(slot.bitmap_top-bitmap.rows))) # kerning = face.get_kerning(previous, c) # width += (slot.advance.x >> 6) + (kerning.x >> 6) # previous = c # # Z = numpy.zeros((height,37), dtype=numpy.ubyte) # print(Z.shape) # # Second pass for actual rendering # x, y = 0, 0 # previous = 0 # for c in text: # face.load_char(c) # bitmap = slot.bitmap # top = slot.bitmap_top # left = slot.bitmap_left # w,h = bitmap.width, bitmap.rows # y = height-baseline-top # kerning = face.get_kerning(previous, c) # x += (kerning.x >> 6) # print(x, y ,h, w) # Z[y:y+h,x:x+w] += numpy.array(bitmap.buffer, dtype='ubyte').reshape(h,w) # x += (slot.advance.x >> 6) # previous = c # print(Z.shape) # img = cv2.imread("test.jpg") # img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # # array = np.array(img) # #RGB # array = array.reshape(520, 520, 3)[:, :, (2, 1, 0)] # # print(array) # # array_g = np.array(img_g) # array_g = array_g.reshape(520, 520) # # print(array_g.shape) # x = 250 # y = 250 # # front_matrix = array_g[x: x + Z.shape[0], y: y+Z.shape[1] ] # front_matrix_mean = front_matrix.mean() # print(front_matrix_mean) # for h, h_e in enumerate(Z): # for w, w_e in enumerate(h_e): # if w_e == 0: # continue # if front_matrix_mean > 128: # #R # array[ x + w, y + h , 0] = 0 # #G # array[ x + w, y + h, 1 ] = 0 # #B # array[ x + w, y + h , 2] = 0 # else: # #R # array[ x + w, y + h , 0] = 255 # #G # array[ x + w, y + h, 1 ] = 255 # #B # array[ x + w, y + h , 2] = 255 # # # plt.figure(figsize=(10, 10*Z.shape[0]/float(Z.shape[1]))) # plt.imshow(array) # plt.xticks([]), plt.yticks([]) # plt.show()
Result
Actual test results:
When the font file opens:
#PS: Respect the original, don't spray
#PS: To reproduce, please indicate the source. I have the copyright.
Please leave a message if you have any questions and I will reply as soon as I see them