Showing
1 changed file
with
86 additions
and
4 deletions
| ... | @@ -7,17 +7,18 @@ import json | ... | @@ -7,17 +7,18 @@ import json |
| 7 | from pathlib import Path | 7 | from pathlib import Path |
| 8 | import cv2 | 8 | import cv2 |
| 9 | from table_detector import detect_tables | 9 | from table_detector import detect_tables |
| 10 | +from PIL import Image, ImageEnhance | ||
| 10 | 11 | ||
| 11 | # ==== Config ==== | 12 | # ==== Config ==== |
| 12 | BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) | 13 | BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) |
| 13 | -PDF_NAME = 'aaaa' | 14 | +# PDF_NAME = 'aaaa' |
| 14 | 15 | ||
| 15 | # PDF path | 16 | # PDF path |
| 16 | -pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "fax.pdf" | 17 | +pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "Iwasaki_1.pdf" |
| 17 | # Output folder | 18 | # Output folder |
| 18 | output_folder = Path(BASE_DIR) / "public" / "image" | 19 | output_folder = Path(BASE_DIR) / "public" / "image" |
| 19 | 20 | ||
| 20 | -#PDF_NAME = pdf_path.stem # Get the stem of the PDF file | 21 | +PDF_NAME = pdf_path.stem # Get the stem of the PDF file |
| 21 | #print(PDF_NAME) | 22 | #print(PDF_NAME) |
| 22 | 23 | ||
| 23 | os.makedirs(output_folder, exist_ok=True) | 24 | os.makedirs(output_folder, exist_ok=True) |
| ... | @@ -39,8 +40,89 @@ pages[0].save(image_path, "JPEG") | ... | @@ -39,8 +40,89 @@ pages[0].save(image_path, "JPEG") |
| 39 | 40 | ||
| 40 | # ==== Run OCR ==== | 41 | # ==== Run OCR ==== |
| 41 | image_np = np.array(pages[0]) | 42 | image_np = np.array(pages[0]) |
| 42 | -results = ocr.predict(image_np) | ||
| 43 | 43 | ||
| 44 | +def estimate_text_ratio(gray, block_size=256): | ||
| 45 | + """Tính median text_ratio theo block nhỏ""" | ||
| 46 | + h, w = gray.shape | ||
| 47 | + ratios = [] | ||
| 48 | + for y in range(0, h, block_size): | ||
| 49 | + for x in range(0, w, block_size): | ||
| 50 | + block = gray[y:y+block_size, x:x+block_size] | ||
| 51 | + if block.size == 0: | ||
| 52 | + continue | ||
| 53 | + _, binary = cv2.threshold(block, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | ||
| 54 | + text_mask = 255 - binary | ||
| 55 | + ratio = np.sum(text_mask > 0) / text_mask.size | ||
| 56 | + ratios.append(ratio) | ||
| 57 | + | ||
| 58 | + if len(ratios) == 0: | ||
| 59 | + return 0.0 | ||
| 60 | + return np.median(ratios) # trung vị để tránh bị outlier | ||
| 61 | + | ||
| 62 | +def bolden_text(rgb_img: np.ndarray, | ||
| 63 | + kernel_size: int = 3, | ||
| 64 | + iterations: int = 1, | ||
| 65 | + contrast: float = 1.5, | ||
| 66 | + sharpness: float = 1.2) -> np.ndarray: | ||
| 67 | + """ | ||
| 68 | + Làm đậm chữ trong ảnh RGB: | ||
| 69 | + - kernel_size: kích thước kernel để nở chữ (2 = nhẹ, 3 = mạnh hơn) | ||
| 70 | + - iterations: số lần dilate | ||
| 71 | + - contrast: hệ số tăng tương phản (>=1.0) | ||
| 72 | + - sharpness: hệ số tăng nét (>=1.0) | ||
| 73 | + """ | ||
| 74 | + # RGB -> Gray | ||
| 75 | + gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY) | ||
| 76 | + | ||
| 77 | + # Nhị phân Otsu | ||
| 78 | + _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | ||
| 79 | + | ||
| 80 | + # Text mask: chữ = 255 | ||
| 81 | + threshold = 0.02 | ||
| 82 | + text_ratio = estimate_text_ratio(gray, block_size=256) | ||
| 83 | + print(f"text_ratio={text_ratio:.3f} -> {'Mảnh' if text_ratio < threshold else 'Đậm'}") | ||
| 84 | + | ||
| 85 | + if text_ratio > threshold: | ||
| 86 | + return rgb_img | ||
| 87 | + | ||
| 88 | + # Dilation | ||
| 89 | + text_mask = 255 - binary | ||
| 90 | + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size)) | ||
| 91 | + bold_mask = cv2.dilate(text_mask, kernel, iterations=iterations) | ||
| 92 | + | ||
| 93 | + # Overlay lên gray gốc | ||
| 94 | + inv_gray = 255 - gray | ||
| 95 | + inv_gray_boost = np.maximum(inv_gray, bold_mask) | ||
| 96 | + out_gray = 255 - inv_gray_boost | ||
| 97 | + | ||
| 98 | + # Tăng contrast (linear scale) | ||
| 99 | + out_gray = cv2.convertScaleAbs(out_gray, alpha=contrast, beta=0) | ||
| 100 | + | ||
| 101 | + # Tăng sharpness bằng unsharp mask | ||
| 102 | + blur = cv2.GaussianBlur(out_gray, (0, 0), 0.8) | ||
| 103 | + out_gray = cv2.addWeighted(out_gray, sharpness, blur, -0.2, 0) | ||
| 104 | + | ||
| 105 | + # Trả về RGB cho PaddleOCR | ||
| 106 | + out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB) | ||
| 107 | + return out_rgb | ||
| 108 | + | ||
| 109 | +preprocessed = bolden_text( | ||
| 110 | + image_np, | ||
| 111 | + kernel_size=3, # tăng lên 3 nếu chữ vẫn mảnh | ||
| 112 | + iterations=1, # tăng lên 2 nếu muốn đậm hơn | ||
| 113 | + contrast=1.5, # 1.0 = giữ nguyên, 1.5-2.0 = rõ hơn | ||
| 114 | + sharpness=1.2 # >1.0 để nét hơn | ||
| 115 | +) | ||
| 116 | + | ||
| 117 | +debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg") | ||
| 118 | +cv2.imwrite(debug_path, cv2.cvtColor(preprocessed, cv2.COLOR_RGB2BGR)) | ||
| 119 | +print(f"[DEBUG] Preprocessed image saved to: {debug_path}") | ||
| 120 | + | ||
| 121 | +# Gọi OCR (đảm bảo 3 kênh) | ||
| 122 | +if preprocessed.ndim == 2: | ||
| 123 | + preprocessed = cv2.cvtColor(preprocessed, cv2.COLOR_GRAY2RGB) | ||
| 124 | + | ||
| 125 | +results = ocr.predict(preprocessed) | ||
| 44 | # ==== Convert polygon to bbox ==== | 126 | # ==== Convert polygon to bbox ==== |
| 45 | def poly_to_bbox(poly): | 127 | def poly_to_bbox(poly): |
| 46 | xs = [p[0] for p in poly] | 128 | xs = [p[0] for p in poly] | ... | ... |
-
Please register or sign in to post a comment