nếu chữ mảnh tăng đậm hơn

tien_nemo
Commit 6d4d3dd1697eaa9cfd60bb65f8a27cbabd20d8e2 6d4d3dd1 1 parent 7dd2c172
Showing 2 changed files with 17 additions and 3 deletions
app/Services/OCR/extrac_table.py
app/Services/OCR/table_detector.py
--- a/app/Services/OCR/extrac_table.py
View file @6d4d3dd
+++ b/app/Services/OCR/extrac_table.py
View file @6d4d3dd
@@ -143,8 +143,22 @@ for res in results:
         })
 
 # ==== Detect table ====
- table_info = detect_tables(image_path, ocr_data_list)
- 
+ table_info = detect_tables(image_path)
+ 
+ for table in table_info:
+     for row in table["cells"]:  # row là list các cell dict
+         for cell in row:
+             x1, y1, x2, y2 = cell["cell"]
+             cell_texts = []
+ 
+             for item in ocr_data_list:
+                 bx1, by1, bx2, by2 = item["bbox"]
+                 if bx1 >= x1 and by1 >= y1 and bx2 <= x2 and by2 <= y2:
+                     cell_texts.append(item["text"])
+ 
+             # thêm vào cell gốc
+             cell["texts"] = cell_texts
+             cell["text"] = " ".join(cell_texts)
 # ==== Build JSON ====
 final_json = {
     "ocr_data": ocr_data_list,
--- a/app/Services/OCR/table_detector.py
View file @6d4d3dd
+++ b/app/Services/OCR/table_detector.py
View file @6d4d3dd
@@ -2,7 +2,7 @@ import cv2
 import numpy as np
 import os
 
- def detect_tables(image_path, ocr_data_list):
+ def detect_tables(image_path):
     img = cv2.imread(image_path)
     if img is None:
         raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")