tien_nemo

nếu chữ mảnh tăng đậm hơn

......@@ -143,8 +143,22 @@ for res in results:
})
# ==== Detect table ====
table_info = detect_tables(image_path, ocr_data_list)
table_info = detect_tables(image_path)
for table in table_info:
for row in table["cells"]: # row là list các cell dict
for cell in row:
x1, y1, x2, y2 = cell["cell"]
cell_texts = []
for item in ocr_data_list:
bx1, by1, bx2, by2 = item["bbox"]
if bx1 >= x1 and by1 >= y1 and bx2 <= x2 and by2 <= y2:
cell_texts.append(item["text"])
# thêm vào cell gốc
cell["texts"] = cell_texts
cell["text"] = " ".join(cell_texts)
# ==== Build JSON ====
final_json = {
"ocr_data": ocr_data_list,
......
......@@ -2,7 +2,7 @@ import cv2
import numpy as np
import os
def detect_tables(image_path, ocr_data_list):
def detect_tables(image_path):
img = cv2.imread(image_path)
if img is None:
raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")
......