tien_nemo

fix anh dung keu 22/09

......@@ -141,20 +141,7 @@ for res in results:
"field": "",
"hideBorder": False
})
# ==== Detect table ====
if debug_file and os.path.exists(debug_file):
image_path = debug_file
table_info = detect_tables(image_path)
for table in table_info:
for row in table["cells"]: # row là list các cell dict
for cell in row:
x1, y1, x2, y2 = cell["cell"]
cell_texts = []
# Helper: compute overlap ratio of bbox against cell
def overlap_ratio(bbox, cell_box):
def overlap_ratio(bbox, cell_box):
ix1 = max(bbox[0], cell_box[0])
iy1 = max(bbox[1], cell_box[1])
ix2 = min(bbox[2], cell_box[2])
......@@ -164,14 +151,21 @@ for table in table_info:
inter = iw * ih
bbox_area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]))
return inter / float(bbox_area)
# Helper: check center inside cell
def center_inside(bbox, cell_box):
def center_inside(bbox, cell_box):
cx = (bbox[0] + bbox[2]) / 2.0
cy = (bbox[1] + bbox[3]) / 2.0
return (cx >= cell_box[0] and cx <= cell_box[2] and
cy >= cell_box[1] and cy <= cell_box[3])
# ==== Detect table ====
if debug_file and os.path.exists(debug_file):
image_path = debug_file
table_info = detect_tables(image_path)
for index, table in enumerate(table_info):
for row in table["cells"]: # row là list các cell dict
for cell in row:
x1, y1, x2, y2 = cell["cell"]
cell_texts = []
cell_box = [x1, y1, x2, y2]
for item in ocr_data_list:
bx1, by1, bx2, by2 = item["bbox"]
......@@ -179,6 +173,13 @@ for table in table_info:
# Accept if bbox is largely inside the cell, or its center lies inside the cell
if overlap_ratio(bbox, cell_box) >= 0.3 or center_inside(bbox, cell_box):
cell_texts.append(item["text"])
item["table"] = {
"bbox": {
"table_index": index,
"row_idx": cell["row_idx"],
"col_idx": cell["col_idx"]
}
}
# thêm vào cell gốc
cell["texts"] = cell_texts
......