^ _ ^

安装 Pytesseract

可以使用pytesseract库从图像中提取文本。Tesseract是一款由Google赞助的开源OCR。

Step1 : python下载pytesseract模块

1	pip install pytesseract

Step2 : 下载Tesseract-OCR
Tesseract-OCR安装程序下载链接：Tesseract-OCR

将Tesseract-OCR安装到自定义目录（例如我的电脑是D:\Tesseract-OCR）

Step3 : 修改pytesseract.py文件
找到python下pytesseract模块的路径（例如我的电脑是D:\python36\Lib\site-packages\pytesseract），修改文件夹下的pytesseract.py文件。点开编辑，找到tesseract_cmd将它改为你刚刚安装的tesseract的路径。

Step4 : 修改环境变量
在用户变量或系统变量的Path中添加Tesseract-OCR路径下的tessdata

Step5 : 测试

from PIL import Image
import pytesseract
if __name__ == '__main__':
    image = Image.open("code.jpg")
    image = image.convert('RGB')
    text = pytesseract.image_to_string(image)
    print(text)

参考资料

Pytesseract的安装与使用：https://www.jianshu.com/p/2db541800418

车牌识别

import cv2 as cv
import imutils
import numpy as np
import pytesseract

# 1. 预处理图像
def preprocess(img):
    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) 
    gray = cv.bilateralFilter(gray, 13, 15, 15)
    #cv.imshow("gray", gray)
    return gray
    
# 2. 边缘检测
def edge_detect(img):
    edged = cv.Canny(img, 30, 200) 
    #cv.imshow("edged", edged)
    return edged

# 3. 找到所有的轮廓，显示面积前10的轮廓
def contour_detect(img, edged):
    contours = cv.findContours(edged.copy(), cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    contours = imutils.grab_contours(contours)
    contours = sorted(contours, key = cv.contourArea, reverse = True)[:10]
    copy_img = img.copy()
    cv.drawContours(copy_img, contours, -1, (0, 0, 255), 3)
    #cv.imshow("contours", copy_img)
    return contours

# 4. 定位车牌
def position(img, contours):
    # screenCnt 保存所有可能是车牌的轮廓：
    screenCnt = []
    for c in contours:
        # 轮廓近似
        peri = cv.arcLength(c, True)
        approx = cv.approxPolyDP(c, 0.018 * peri, True)
        # 近似轮廓是否为矩形
        if len(approx) == 4:
            screenCnt.append(approx)

    if len(screenCnt) != 0:
        cv.drawContours(img, screenCnt, -1, (0, 255, 0), 3)
        #cv.imshow("card position", img)
        return screenCnt[0]
    else:
        print("No contour detected")
        return None

# 5. 对图片中车牌以外的部分进行遮罩，提取出车牌区域
def extract_card(gray, card_contour):
    mask = np.zeros(gray.shape, np.uint8)
    cv.drawContours(mask, [card_contour], 0, 255, -1)

    (x, y) = np.where(mask == 255)
    (topx, topy) = (np.min(x), np.min(y))
    (bottomx, bottomy) = (np.max(x), np.max(y))
    card = gray[topx:bottomx+1, topy:bottomy+1]

    #cv.imshow("card", card)
    return card

# 6. 识别车牌
def ocr_card(card):
    cv.imshow("card", card)
    text = pytesseract.image_to_string(card, config='--psm 11')
    print(text)
    return text

# 7. 将识别到的文字显示在画布上
def draw_text(text):
    # 创建黑色画布
    img = np.zeros((100,300,3), np.uint8)
    font = cv.FONT_HERSHEY_SIMPLEX
    cv.putText(img, text, (50,50), font, 1, (0,255,0), 3)
    cv.imshow("text", img)

if __name__ == "__main__":
    img = cv.imread('car1.jpeg', cv.IMREAD_COLOR)
    img = cv.resize(img, (600, 400))
    #cv.imshow("img", img)

    gray = preprocess(img)
    edged = edge_detect(gray)
    contours = contour_detect(img, edged)
    card_contour = position(img, contours)
    card = extract_card(gray, card_contour)
    text = ocr_card(card)
    draw_text(text)

    cv.waitKey(0)