nếu chữ mảnh tăng đậm hơn

tien_nemo
Commit 228013298caf59cb60db3d42f61f404815fd9533 22801329 1 parent 1d84b308
Showing 1 changed file with 86 additions and 4 deletions
app/Services/OCR/extrac_table.py
--- a/app/Services/OCR/extrac_table.py
View file @2280132
+++ b/app/Services/OCR/extrac_table.py
View file @2280132
@@ -7,17 +7,18 @@ import json
 from pathlib import Path
 import cv2
 from table_detector import detect_tables
+from PIL import Image, ImageEnhance
 # ==== Config ====
 BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
-PDF_NAME = 'aaaa'
+# PDF_NAME = 'aaaa'
 # PDF path
-pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "fax.pdf"
+pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "Iwasaki_1.pdf"
 # Output folder
 output_folder = Path(BASE_DIR) / "public" / "image"
-#PDF_NAME = pdf_path.stem  # Get the stem of the PDF file
+PDF_NAME = pdf_path.stem  # Get the stem of the PDF file
 #print(PDF_NAME)
 os.makedirs(output_folder, exist_ok=True)
@@ -39,8 +40,89 @@ pages[0].save(image_path, "JPEG")
 # ==== Run OCR ====
 image_np = np.array(pages[0])
-results = ocr.predict(image_np)
+def estimate_text_ratio(gray, block_size=256):
+    """Tính median text_ratio theo block nhỏ"""
+    h, w = gray.shape
+    ratios = []
+    for y in range(0, h, block_size):
+        for x in range(0, w, block_size):
+            block = gray[y:y+block_size, x:x+block_size]
+            if block.size == 0:
+                continue
+            _, binary = cv2.threshold(block, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+            text_mask = 255 - binary
+            ratio = np.sum(text_mask > 0) / text_mask.size
+            ratios.append(ratio)
+
+    if len(ratios) == 0:
+        return 0.0
+    return np.median(ratios)  # trung vị để tránh bị outlier
+
+def bolden_text(rgb_img: np.ndarray,
+                kernel_size: int = 3,
+                iterations: int = 1,
+                contrast: float = 1.5,
+                sharpness: float = 1.2) -> np.ndarray:
+    """
+    Làm đậm chữ trong ảnh RGB:
+      - kernel_size: kích thước kernel để nở chữ (2 = nhẹ, 3 = mạnh hơn)
+      - iterations: số lần dilate
+      - contrast: hệ số tăng tương phản (>=1.0)
+      - sharpness: hệ số tăng nét (>=1.0)
+    """
+    # RGB -> Gray
+    gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
+
+    # Nhị phân Otsu
+    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+
+    # Text mask: chữ = 255
+    threshold = 0.02
+    text_ratio = estimate_text_ratio(gray, block_size=256)
+    print(f"text_ratio={text_ratio:.3f} -> {'Mảnh' if text_ratio < threshold else 'Đậm'}")
+
+    if text_ratio > threshold:
+        return rgb_img
+
+    # Dilation
+    text_mask = 255 - binary
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
+    bold_mask = cv2.dilate(text_mask, kernel, iterations=iterations)
+
+    # Overlay lên gray gốc
+    inv_gray = 255 - gray
+    inv_gray_boost = np.maximum(inv_gray, bold_mask)
+    out_gray = 255 - inv_gray_boost
+
+    # Tăng contrast (linear scale)
+    out_gray = cv2.convertScaleAbs(out_gray, alpha=contrast, beta=0)
+
+    # Tăng sharpness bằng unsharp mask
+    blur = cv2.GaussianBlur(out_gray, (0, 0), 0.8)
+    out_gray = cv2.addWeighted(out_gray, sharpness, blur, -0.2, 0)
+
+    # Trả về RGB cho PaddleOCR
+    out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB)
+    return out_rgb
+
+preprocessed = bolden_text(
+    image_np,
+    kernel_size=3,     # tăng lên 3 nếu chữ vẫn mảnh
+    iterations=1,      # tăng lên 2 nếu muốn đậm hơn
+    contrast=1.5,      # 1.0 = giữ nguyên, 1.5-2.0 = rõ hơn
+    sharpness=1.2      # >1.0 để nét hơn
+)
+
+debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
+cv2.imwrite(debug_path, cv2.cvtColor(preprocessed, cv2.COLOR_RGB2BGR))
+print(f"[DEBUG] Preprocessed image saved to: {debug_path}")
+
+# Gọi OCR (đảm bảo 3 kênh)
+if preprocessed.ndim == 2:
+    preprocessed = cv2.cvtColor(preprocessed, cv2.COLOR_GRAY2RGB)
+
+results = ocr.predict(preprocessed)
 # ==== Convert polygon to bbox ====
 def poly_to_bbox(poly):
     xs = [p[0] for p in poly]