nếu chữ mảnh tăng đậm hơn

tien_nemo
Commit 6d4d3dd1697eaa9cfd60bb65f8a27cbabd20d8e2 6d4d3dd1 1 parent 7dd2c172
Showing 2 changed files with 17 additions and 3 deletions
app/Services/OCR/extrac_table.py
app/Services/OCR/table_detector.py
--- a/app/Services/OCR/extrac_table.py
View file @6d4d3dd
+++ b/app/Services/OCR/extrac_table.py
View file @6d4d3dd
@@ -143,8 +143,22 @@ for res in results:
         })
 # ==== Detect table ====
-table_info = detect_tables(image_path, ocr_data_list)
+table_info = detect_tables(image_path)
-
+
+for table in table_info:
+    for row in table["cells"]:  # row là list các cell dict
+        for cell in row:
+            x1, y1, x2, y2 = cell["cell"]
+            cell_texts = []
+
+            for item in ocr_data_list:
+                bx1, by1, bx2, by2 = item["bbox"]
+                if bx1 >= x1 and by1 >= y1 and bx2 <= x2 and by2 <= y2:
+                    cell_texts.append(item["text"])
+
+            # thêm vào cell gốc
+            cell["texts"] = cell_texts
+            cell["text"] = " ".join(cell_texts)
 # ==== Build JSON ====
 final_json = {
     "ocr_data": ocr_data_list,
--- a/app/Services/OCR/table_detector.py
View file @6d4d3dd
+++ b/app/Services/OCR/table_detector.py
View file @6d4d3dd
@@ -2,7 +2,7 @@ import cv2
 import numpy as np
 import os
-def detect_tables(image_path, ocr_data_list):
+def detect_tables(image_path):
     img = cv2.imread(image_path)
     if img is None:
         raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")