nếu chữ mảnh tăng đậm hơn

tien_nemo
Commit 228013298caf59cb60db3d42f61f404815fd9533 22801329 1 parent 1d84b308
Showing 1 changed file with 86 additions and 4 deletions
app/Services/OCR/extrac_table.py
--- a/app/Services/OCR/extrac_table.py
View file @2280132
+++ b/app/Services/OCR/extrac_table.py
View file @2280132
@@ -7,17 +7,18 @@ import json
 from pathlib import Path
 import cv2
 from table_detector import detect_tables
+ from PIL import Image, ImageEnhance
 
 # ==== Config ====
 BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
- PDF_NAME = 'aaaa'
+ # PDF_NAME = 'aaaa'
 
 # PDF path
- pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "fax.pdf"
+ pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "Iwasaki_1.pdf"
 # Output folder
 output_folder = Path(BASE_DIR) / "public" / "image"
 
- #PDF_NAME = pdf_path.stem  # Get the stem of the PDF file
+ PDF_NAME = pdf_path.stem  # Get the stem of the PDF file
 #print(PDF_NAME)
 
 os.makedirs(output_folder, exist_ok=True)
@@ -39,8 +40,89 @@ pages[0].save(image_path, "JPEG")
 
 # ==== Run OCR ====
 image_np = np.array(pages[0])
- results = ocr.predict(image_np)
 
+ def estimate_text_ratio(gray, block_size=256):
+     """Tính median text_ratio theo block nhỏ"""
+     h, w = gray.shape
+     ratios = []
+     for y in range(0, h, block_size):
+         for x in range(0, w, block_size):
+             block = gray[y:y+block_size, x:x+block_size]
+             if block.size == 0:
+                 continue
+             _, binary = cv2.threshold(block, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+             text_mask = 255 - binary
+             ratio = np.sum(text_mask > 0) / text_mask.size
+             ratios.append(ratio)
+ 
+     if len(ratios) == 0:
+         return 0.0
+     return np.median(ratios)  # trung vị để tránh bị outlier
+ 
+ def bolden_text(rgb_img: np.ndarray,
+                 kernel_size: int = 3,
+                 iterations: int = 1,
+                 contrast: float = 1.5,
+                 sharpness: float = 1.2) -> np.ndarray:
+     """
+     Làm đậm chữ trong ảnh RGB:
+       - kernel_size: kích thước kernel để nở chữ (2 = nhẹ, 3 = mạnh hơn)
+       - iterations: số lần dilate
+       - contrast: hệ số tăng tương phản (>=1.0)
+       - sharpness: hệ số tăng nét (>=1.0)
+     """
+     # RGB -> Gray
+     gray = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2GRAY)
+ 
+     # Nhị phân Otsu
+     _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+ 
+     # Text mask: chữ = 255
+     threshold = 0.02
+     text_ratio = estimate_text_ratio(gray, block_size=256)
+     print(f"text_ratio={text_ratio:.3f} -> {'Mảnh' if text_ratio < threshold else 'Đậm'}")
+ 
+     if text_ratio > threshold:
+         return rgb_img
+ 
+     # Dilation
+     text_mask = 255 - binary
+     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
+     bold_mask = cv2.dilate(text_mask, kernel, iterations=iterations)
+ 
+     # Overlay lên gray gốc
+     inv_gray = 255 - gray
+     inv_gray_boost = np.maximum(inv_gray, bold_mask)
+     out_gray = 255 - inv_gray_boost
+ 
+     # Tăng contrast (linear scale)
+     out_gray = cv2.convertScaleAbs(out_gray, alpha=contrast, beta=0)
+ 
+     # Tăng sharpness bằng unsharp mask
+     blur = cv2.GaussianBlur(out_gray, (0, 0), 0.8)
+     out_gray = cv2.addWeighted(out_gray, sharpness, blur, -0.2, 0)
+ 
+     # Trả về RGB cho PaddleOCR
+     out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB)
+     return out_rgb
+ 
+ preprocessed = bolden_text(
+     image_np,
+     kernel_size=3,     # tăng lên 3 nếu chữ vẫn mảnh
+     iterations=1,      # tăng lên 2 nếu muốn đậm hơn
+     contrast=1.5,      # 1.0 = giữ nguyên, 1.5-2.0 = rõ hơn
+     sharpness=1.2      # >1.0 để nét hơn
+ )
+ 
+ debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
+ cv2.imwrite(debug_path, cv2.cvtColor(preprocessed, cv2.COLOR_RGB2BGR))
+ print(f"[DEBUG] Preprocessed image saved to: {debug_path}")
+ 
+ # Gọi OCR (đảm bảo 3 kênh)
+ if preprocessed.ndim == 2:
+     preprocessed = cv2.cvtColor(preprocessed, cv2.COLOR_GRAY2RGB)
+ 
+ results = ocr.predict(preprocessed)
 # ==== Convert polygon to bbox ====
 def poly_to_bbox(poly):
     xs = [p[0] for p in poly]