Tip: after the article is written, the directory can be generated automatically. Please refer to the help document on the right for how to generate it
preface
Tip: the project needs to identify the real-time collected pictures and identify the floating-point numbers in the pictures
The idea is as follows: CV is used to process the image, and the processed image is segmented by contour (two methods are used here). One is active cutting recognition of mat data, and the other is CV with contour segmentation.
1, Two segmentation ideas
- First, the image is grayed out
- Then binarize the picture
- Start picture segmentation
- Collect the mat data of the picture and read it according to the row and column. Read that the leftmost row data in mat is the uppermost sequence number of 0, read the downmost data with empty row data in mat, and then read the number with column not 0 until column is 0. In this way, a rectangle is collected, which is the number to be identified.
!!!!! However, there are problems with this method. For example, after the image is binarized, part of the data is disconnected, or the two numbers are connected. For example, there may be no gap in the top row of 77 binarized data - The second way is to directly use the function of cv (later found)
!!!!! The same problem exists.
There is a solution, that is, compare the pictures and exhaust all special numbers, such as 0-9 and The picture of is left0 - left10 png
For 77, you can create an 11 Png to compare, if it is 11, it is 77, which is too troublesome. Finally, the OCR processing method is adopted (see my other article Tesseract C + +)
2, Use steps
Source code
The annotation is the first segmentation method originally used. There is also contrast image generation. You need to generate 0-9 and 0-9 with the annotation code of the last function Pictures of. I have time to make detailed modifications. I've been busy recently.
The code is as follows (example):
/******************************************************** * @file : cvstract.h * @brief : Image segmentation recognition file * @details : * @author : liuyapeng5 * @date : 2021-8-11 *********************************************************/ #pragma once #include "include/opencv2/highgui.hpp" #include "include/opencv2/core.hpp" #include "include/opencv2/opencv.hpp" #include "include/opencv2/imgproc.hpp" #include "include/opencv2/imgcodecs.hpp" #include "include/opencv2/opencv_modules.hpp" #include <QMessageBox> #include <QTextCodec> #include <QDir> #include <QDebug> #include <stack> using namespace cv; static bool isDian = false; int getColSum(Mat src, int col) { int sum = 0; int height = src.rows; int width = src.cols; for (int i = 0; i < height; i++) { sum = sum + src.at <uchar>(i, col); } return sum; } int getRowSum(Mat src, int row) { int sum = 0; int height = src.rows; int width = src.cols; for (int i = 0; i < width; i++) { sum += src.at <uchar>(row, i); } return sum; } void cutTop(Mat& src, Mat& dstImg)//Cut up and down { int top, bottom; top = 0; bottom = src.rows; int i; for (i = 0; i < src.rows; i++) { int colValue = getRowSum(src, i); //cout <<i<<" th "<< colValue << endl; if (colValue > 0) { top = i; break; } } for (int j = src.rows-1; j > i; j--) { int colValue = getRowSum(src, j); //cout << i << " th " << colValue << endl; if (colValue != 0) { bottom = j; break; } } int height = bottom - top+3; Rect rect(0, top-1, src.cols, height); dstImg = src(rect).clone(); } int cutLeft(Mat& src, Mat& leftImg, Mat& rightImg)//Left and right cutting { int left, right; left = 0; right = src.cols; int i; for (i = 0; i < src.cols; i++) { int colValue = getColSum(src, i); if (colValue > 0) { left = i; //qDebug() <<i<<" th1 "<< colValue << endl; if (colValue == 510) { int Value = getColSum(src, i + 1); if (Value == 510) { Rect rect(left, 0, 2, src.rows); leftImg = src(rect).clone(); cutTop(leftImg, leftImg); if (leftImg.rows == 2 && leftImg.cols == 2) { right = left + 2; Rect rectRight(right, 0, src.cols - right, src.rows); rightImg = src(rectRight).clone(); isDian = true; return 0; } } } break; } } if (left == 0 && !isDian) { return 1; } isDian = false; for (; i < src.cols; i++) { int colValue = getColSum(src, i); if (colValue == 0) { right = i; //qDebug() << i << " th2 " << colValue << endl; break; } } int width = right - left; Rect rect(left, 0, width, src.rows); leftImg = src(rect).clone(); Rect rectRight(right, 0, src.cols - right, src.rows); rightImg = src(rectRight).clone(); cutTop(leftImg, leftImg); return 0; } void getPXSum(Mat& src, int& a)//Get all pixels and { threshold(src, src, 100, 255, THRESH_BINARY); a = 0; for (int i = 0; i < src.rows; i++) { for (int j = 0; j < src.cols; j++) { a += src.at <uchar>(i, j); } } } int getSubtract(Mat src, int TemplateNum) //Subtract two pictures { Mat img_result; int min = 1000000; int serieNum = 0; for (int i = 0; i < TemplateNum; i++) { QString str = QDir::currentPath() + QDir::separator() + "src" + QDir::separator() + QString("%1Left.png").arg(i); QTextCodec* code = QTextCodec::codecForName("gb18030"); std::string name = code->fromUnicode(str).data(); // Solve Chinese garbled code and input imread as string Mat Template = imread(name, COLOR_BGR2GRAY); Mat dst(src); threshold(Template, Template, 100, 255, THRESH_BINARY); /*threshold(src, dst, 100, 255, THRESH_BINARY);*/ resize(dst, dst, Size(16, 16), 0, 0, INTER_CUBIC); resize(Template, Template, Size(16, 16), 0, 0, INTER_CUBIC); //imshow(name, Template); //std::cout << "\n" << dst; //std::cout << "\n" << Template; absdiff(Template, dst, img_result); int diff = 0; getPXSum(img_result, diff); if (diff < min) { min = diff; serieNum = i; } } //std::cout << "i =========" << min << "\n"; //std::cout << "i--------------" << serieNum << " " << serieNum << "\n"; return serieNum; } //The points on each connected domain are sorted according to y from small to large void SortContourPoint(std::vector<std::vector<Point> > &inputContours) { std::vector<Point> tempContoursPoint; for (int i = 0; i < inputContours.size(); i++) { tempContoursPoint.clear(); //Pay attention to emptying each cycle for (int j = i+1; j < inputContours.size(); j++) { if (inputContours[i][0].x > inputContours[j][0].x) { swap(inputContours[i], inputContours[j]); } } } } //src: binary graph to be divided, with a maximum value of 255 //segMat: split each picture //Algorithm: judge the connected domain, and several connected domains will be divided into several sub images //Purpose: to segment non adhesive numbers in handwritten numeral recognition void getConnectedDomain(cv::Mat& src, std::vector<cv::Mat>& segMat)//segMat is the final result and stores each divided number { int img_row = src.rows; int img_col = src.cols; cv::Mat flag = cv::Mat::zeros(cv::Size(img_col, img_row), CV_8UC1);//Flag matrix. If it is 0, the current pixel has not been accessed for (int i = 0; i < img_row; i++) { for (int j = 0; j < img_col; j++) { if (src.ptr<uchar>(i)[j] == 255 && flag.ptr<uchar>(i)[j] == 0) { cv::Mat subMat = cv::Mat::zeros(cv::Size(img_col, img_row), CV_8UC1);//Representative subgraph std::stack<cv::Point2f> cd; cd.push(cv::Point2f(j, i)); flag.ptr<uchar>(i)[j] = 1; subMat.ptr<uchar>(i)[j] = 255; while (!cd.empty()) { cv::Point2f tmp = cd.top(); cd.pop(); cv::Point2f p[4];//Neighborhood pixels, the four neighborhoods used here p[0] = cv::Point2f(tmp.x - 1 > 0 ? tmp.x - 1 : 0, tmp.y); p[1] = cv::Point2f(tmp.x + 1 < img_col - 1 ? tmp.x + 1 : img_row - 1, tmp.y); p[2] = cv::Point2f(tmp.x, tmp.y - 1 > 0 ? tmp.y - 1 : 0); p[3] = cv::Point2f(tmp.x, tmp.y + 1 < img_row - 1 ? tmp.y + 1 : img_row - 1); for (int m = 0; m < 4; m++) { int x = p[m].y; int y = p[m].x; if (src.ptr<uchar>(x)[y] == 255 && flag.ptr<uchar>(x)[y] == 0)//If it is not accessed, it is stacked and marked that the point has been accessed { cd.push(p[m]); flag.ptr<uchar>(x)[y] = 1; subMat.ptr<uchar>(x)[y] = 255; } } } segMat.push_back(subMat); } } } } QString cvrun(cv::Mat src, int model) { //QString str = QDir::currentPath() + QDir::separator() + "cc.png"; //QTextCodec* code = QTextCodec::codecForName("gb18030"); //std::string name = code->fromUnicode(str). data(); // Solve Chinese garbled code and input imread as string //src = imread(name ); //Data null determination if (!src.data) { qDebug() << "error"; return 0; } cvtColor(src, src, COLOR_BGR2GRAY); //Grayscale display threshold(src, src, 160, 255, THRESH_BINARY); //Binarization //imshow("original", src); corrosion //erode(src, src, NULL ); // //imshow("corrosion _cv", src); expand //Mat element = getStructuringElement(MORPH_RECT, Size(3, 3)); //dilate(src, src, element, Point(-1, -1), // 2, // BORDER_CONSTANT); //iterations=6,6 is the number of expansions, indicating that the operation has expanded 6 times //imshow("expansion _my", src); //waitKey(0); if (model==0) { bitwise_not(src, src);//Color reversal } //std::vector<cv::Mat> segMat; // getConnectedDomain(src, segMat); std::vector<std::vector<Point>> contours; Mat hierarchy; //Cut original image findContours(src, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_NONE); //drawContours(src, contours, 2, Scalar(255, 0, 255), 1); //imshow(" g",src); waitKey(0); //std::cout << src <<"\n"; //Sort the cut data SortContourPoint(contours); std::vector<std::vector<Point>>::const_iterator iter = contours.begin(); std::vector<Mat> vecMat; int i = 0; while (iter != contours.end()) { //Get rect value Rect rc = boundingRect(*iter); //Get the data of the location of the original image Mat leftImg = src(rc).clone(); //std::cout << leftImg<<"\n"; vecMat.push_back(leftImg); //char nameLeft[10]; //sprintf(nameLeft, "%dLeft", i); //char nameRight[10]; //sprintf(nameRight, "%dRight", i); //i++; imshow(nameLeft, leftImg); std::cout << leftImg; //std::stringstream ss; //ss << nameLeft; //imwrite("D:\\" + ss.str() + ".png", leftImg); iter++; } QString strnum = ""; for (auto it: vecMat ) { //Compare digital data int number = getSubtract(it, 11); if (number == 10) { strnum += "."; } else { strnum += QString::number(number); } } return strnum; // imshow("origin", src); //std::cout<<src; //Mat leftImg, rightImg; //int res = cutLeft(src, leftImg, rightImg); int i = 0; //QString strnum = ""; //while (res == 0) //{ // char nameLeft[10]; // sprintf(nameLeft, "%dLeft", i); // char nameRight[10]; // sprintf(nameRight, "%dRight", i); // i++; // //imshow(nameLeft, leftImg); // std::cout << leftImg; // std::stringstream ss; // ss << nameLeft; // imwrite("D:\\" + ss.str() + ".png", leftImg); // Mat srcTmp = rightImg; // // QString str = "D:\\1Left.png"; // // QTextCodec *code = QTextCodec::codecForName("gb18030"); // // std::string name = code->fromUnicode(str). data(); // Solve Chinese garbled code and input imread as string // // Mat Template = imread(name, COLOR_BGR2GRAY); // // // resize(Template, Template, Size(Template.cols, Template.rows), 0, 0, THRESH_BINARY); // // std::cout<<"\n"<<Template.cols<< " " <<Template.rows; // // std::cout<<"\n"<<Template; // // std::cout<<"\n"<<leftImg; // int number = getSubtract(leftImg, 11); // if (number == 10) // { // strnum += "."; // } // else // { // strnum += QString::number(number); // } // // res = cutLeft(srcTmp, leftImg, rightImg); //} return strnum; }
The url used here is the data requested by the network.
summary
Tip: here is a summary of the article:
The above is what we want to talk about today. This paper only briefly introduces the use of CV for simple number recognition