nếu chữ mảnh tăng đậm hơn

tien_nemo
Commit 7dd2c172f3df912a68de517a1057e1f5cbfb3178 7dd2c172 1 parent 22801329
Showing 2 changed files with 30 additions and 20 deletions
app/Services/OCR/extrac_table.py
app/Services/OCR/table_detector.py
--- a/app/Services/OCR/extrac_table.py
View file @7dd2c17
+++ b/app/Services/OCR/extrac_table.py
View file @7dd2c17
@@ -14,7 +14,7 @@ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "
 # PDF_NAME = 'aaaa'
 
 # PDF path
- pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "Iwasaki_1.pdf"
+ pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "3.pdf"
 # Output folder
 output_folder = Path(BASE_DIR) / "public" / "image"
 
@@ -104,6 +104,9 @@ def bolden_text(rgb_img: np.ndarray,
 
     # Trả về RGB cho PaddleOCR
     out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB)
+     debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
+     cv2.imwrite(debug_path, cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR))
+     print(f"[DEBUG] Preprocessed image saved to: {debug_path}")
     return out_rgb
 
 preprocessed = bolden_text(
@@ -114,9 +117,7 @@ preprocessed = bolden_text(
     sharpness=1.2      # >1.0 để nét hơn
 )
 
- debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
- cv2.imwrite(debug_path, cv2.cvtColor(preprocessed, cv2.COLOR_RGB2BGR))
- print(f"[DEBUG] Preprocessed image saved to: {debug_path}")
+ 
 
 # Gọi OCR (đảm bảo 3 kênh)
 if preprocessed.ndim == 2:
@@ -142,7 +143,7 @@ for res in results:
         })
 
 # ==== Detect table ====
- table_info = detect_tables(image_path)
+ table_info = detect_tables(image_path, ocr_data_list)
 
 # ==== Build JSON ====
 final_json = {
--- a/app/Services/OCR/table_detector.py
View file @7dd2c17
+++ b/app/Services/OCR/table_detector.py
View file @7dd2c17
@@ -2,7 +2,7 @@ import cv2
 import numpy as np
 import os
 
- def detect_tables(image_path):
+ def detect_tables(image_path, ocr_data_list):
     img = cv2.imread(image_path)
     if img is None:
         raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")
@@ -25,7 +25,7 @@ def detect_tables(image_path):
                 ys_candidates.append(y_mid)
                 line_segments.append((x1, x2, y_mid))
 
-     # gom nhóm các y
+     # gom nhóm y
     ys, tol_y = [], 10
     for y in sorted(ys_candidates):
         if not ys or abs(y - ys[-1]) > tol_y:
@@ -43,7 +43,7 @@ def detect_tables(image_path):
             if abs(x1 - x2) <= 3:
                 xs.append(int(round((x1 + x2) / 2)))
 
-     # gom nhóm cột
+     # gom nhóm x
     x_pos, tol_v = [], 10
     for v in sorted(xs):
         if not x_pos or v - x_pos[-1] > tol_v:
@@ -52,26 +52,35 @@ def detect_tables(image_path):
     total_cols = max(0, len(x_pos) - 1)
 
     tables = []
-     if len(ys) >= 3 and line_segments:
+     if total_rows > 0 and total_cols > 0:
         y_min, y_max = ys[0], ys[-1]
-         min_x = min(seg[0] for seg in line_segments)
-         max_x = max(seg[1] for seg in line_segments)
-         table_box = (min_x, y_min, max_x, y_max)
+         x_min, x_max = x_pos[0], x_pos[-1]
+         table_box = (x_min, y_min, x_max, y_max)
 
-         rows = []
-         for i in range(len(ys) - 1):
-             row_box = (min_x, ys[i], max_x, ys[i+1])
-             rows.append({"row": tuple(int(v) for v in row_box)})
-             cv2.rectangle(img, (row_box[0], row_box[1]), (row_box[2], row_box[3]), (0, 255, 255), 2)
+         # build cells
+         rows_data = []
+         for i in range(total_rows):
+             row_cells = []
+             for j in range(total_cols):
+                 cell_box = (x_pos[j], ys[i], x_pos[j+1], ys[i+1])
+                 row_cells.append({
+                     "cell": cell_box,
+                     "row_idx": i,
+                     "col_idx": j
+                 })
+                 # Vẽ ô
+                 cv2.rectangle(img, (cell_box[0], cell_box[1]), (cell_box[2], cell_box[3]), (0, 255, 255), 1)
+             rows_data.append(row_cells)
 
         tables.append({
             "total_rows": int(total_rows),
             "total_cols": int(total_cols),
-             "table_box": tuple(int(v) for v in table_box),
-             "rows_box": rows
+             "table_box": table_box,
+             "cells": rows_data
         })
 
-         cv2.rectangle(img, (min_x, y_min), (max_x, y_max), (255, 0, 0), 3)
+         # vẽ viền bảng
+         cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
 
     debug_path = os.path.splitext(image_path)[0] + "_debug.jpg"
     cv2.imwrite(debug_path, img)