tien_nemo

nếu chữ mảnh tăng đậm hơn

...@@ -143,8 +143,22 @@ for res in results: ...@@ -143,8 +143,22 @@ for res in results:
143 }) 143 })
144 144
145 # ==== Detect table ==== 145 # ==== Detect table ====
146 -table_info = detect_tables(image_path, ocr_data_list) 146 +table_info = detect_tables(image_path)
147 - 147 +
148 +for table in table_info:
149 + for row in table["cells"]: # row là list các cell dict
150 + for cell in row:
151 + x1, y1, x2, y2 = cell["cell"]
152 + cell_texts = []
153 +
154 + for item in ocr_data_list:
155 + bx1, by1, bx2, by2 = item["bbox"]
156 + if bx1 >= x1 and by1 >= y1 and bx2 <= x2 and by2 <= y2:
157 + cell_texts.append(item["text"])
158 +
159 + # thêm vào cell gốc
160 + cell["texts"] = cell_texts
161 + cell["text"] = " ".join(cell_texts)
148 # ==== Build JSON ==== 162 # ==== Build JSON ====
149 final_json = { 163 final_json = {
150 "ocr_data": ocr_data_list, 164 "ocr_data": ocr_data_list,
......
...@@ -2,7 +2,7 @@ import cv2 ...@@ -2,7 +2,7 @@ import cv2
2 import numpy as np 2 import numpy as np
3 import os 3 import os
4 4
5 -def detect_tables(image_path, ocr_data_list): 5 +def detect_tables(image_path):
6 img = cv2.imread(image_path) 6 img = cv2.imread(image_path)
7 if img is None: 7 if img is None:
8 raise FileNotFoundError(f"Không đọc được ảnh: {image_path}") 8 raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")
......