testOCR.py 1.82 KB
Newer Older
IRON xiang's avatar
testOCR  
IRON xiang committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
import os

import cv2
import pytesseract
from pytesseract import Output
import numpy as np
def rename_with_overwrite(src, dst):
    if os.path.isfile(dst):
        os.remove(dst)
    os.rename(src, dst)

def correct_skew(image):
    # 使用Tesseract的OSD模式来检测文本的方向
    osd = pytesseract.image_to_osd(image, output_type=Output.DICT, lang='chi_sim')
    print(osd)
    rotate_angle = osd['rotate']
    print(rotate_angle)
    # 将旋转角度限制在0度、90度、180度和270度
    if (rotate_angle >= 45) and (rotate_angle < 135):
        rotate_angle = cv2.ROTATE_90_CLOCKWISE
    elif (rotate_angle >= 135) and (rotate_angle < 225):
        rotate_angle = cv2.ROTATE_180
    elif (rotate_angle >= 225) and (rotate_angle < 315):
        rotate_angle = cv2.ROTATE_90_COUNTERCLOCKWISE
    else:
        rotate_angle = None

    # 旋转图像
    # (h, w) = image.shape[:2]
    # center = (w // 2, h // 2)
    # M = cv2.getRotationMatrix2D((0,0), rotate_angle, 1.0)
    # rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC,
    #                          borderMode=cv2.BORDER_REPLICATE)
    #

    if rotate_angle:
        rotated = cv2.rotate(image, rotate_angle)
    else:
        rotated = image
    return rotate_angle, rotated


def rotate_file(filename):
    # image = cv2.imread('抽样检验告知书电子版.jpg')
    image = cv2.imdecode(np.fromfile(filename, dtype=np.uint8), -1)
    # rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    angle, rotated = correct_skew(image)
    print(f"Input image was rotated by {angle} degrees to correct for skew.")
    cv2.imwrite('rotated.jpg', rotated)
    src = 'rotated.jpg'

    # 目标文件路径
    dst = filename

    # 使用os.rename剪切并覆盖文件
    rename_with_overwrite(src, dst)

if __name__ == '__main__':
    rotate_file('抽样单电子版.jpg')