tien_nemo

fix anh dung keu 22/09

...@@ -141,37 +141,31 @@ for res in results: ...@@ -141,37 +141,31 @@ for res in results:
141 "field": "", 141 "field": "",
142 "hideBorder": False 142 "hideBorder": False
143 }) 143 })
144 - 144 +def overlap_ratio(bbox, cell_box):
145 + ix1 = max(bbox[0], cell_box[0])
146 + iy1 = max(bbox[1], cell_box[1])
147 + ix2 = min(bbox[2], cell_box[2])
148 + iy2 = min(bbox[3], cell_box[3])
149 + iw = max(0, ix2 - ix1)
150 + ih = max(0, iy2 - iy1)
151 + inter = iw * ih
152 + bbox_area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]))
153 + return inter / float(bbox_area)
154 +def center_inside(bbox, cell_box):
155 + cx = (bbox[0] + bbox[2]) / 2.0
156 + cy = (bbox[1] + bbox[3]) / 2.0
157 + return (cx >= cell_box[0] and cx <= cell_box[2] and
158 + cy >= cell_box[1] and cy <= cell_box[3])
145 # ==== Detect table ==== 159 # ==== Detect table ====
146 if debug_file and os.path.exists(debug_file): 160 if debug_file and os.path.exists(debug_file):
147 image_path = debug_file 161 image_path = debug_file
148 table_info = detect_tables(image_path) 162 table_info = detect_tables(image_path)
149 163
150 -for table in table_info: 164 +for index, table in enumerate(table_info):
151 for row in table["cells"]: # row là list các cell dict 165 for row in table["cells"]: # row là list các cell dict
152 for cell in row: 166 for cell in row:
153 x1, y1, x2, y2 = cell["cell"] 167 x1, y1, x2, y2 = cell["cell"]
154 cell_texts = [] 168 cell_texts = []
155 -
156 - # Helper: compute overlap ratio of bbox against cell
157 - def overlap_ratio(bbox, cell_box):
158 - ix1 = max(bbox[0], cell_box[0])
159 - iy1 = max(bbox[1], cell_box[1])
160 - ix2 = min(bbox[2], cell_box[2])
161 - iy2 = min(bbox[3], cell_box[3])
162 - iw = max(0, ix2 - ix1)
163 - ih = max(0, iy2 - iy1)
164 - inter = iw * ih
165 - bbox_area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]))
166 - return inter / float(bbox_area)
167 -
168 - # Helper: check center inside cell
169 - def center_inside(bbox, cell_box):
170 - cx = (bbox[0] + bbox[2]) / 2.0
171 - cy = (bbox[1] + bbox[3]) / 2.0
172 - return (cx >= cell_box[0] and cx <= cell_box[2] and
173 - cy >= cell_box[1] and cy <= cell_box[3])
174 -
175 cell_box = [x1, y1, x2, y2] 169 cell_box = [x1, y1, x2, y2]
176 for item in ocr_data_list: 170 for item in ocr_data_list:
177 bx1, by1, bx2, by2 = item["bbox"] 171 bx1, by1, bx2, by2 = item["bbox"]
...@@ -179,6 +173,13 @@ for table in table_info: ...@@ -179,6 +173,13 @@ for table in table_info:
179 # Accept if bbox is largely inside the cell, or its center lies inside the cell 173 # Accept if bbox is largely inside the cell, or its center lies inside the cell
180 if overlap_ratio(bbox, cell_box) >= 0.3 or center_inside(bbox, cell_box): 174 if overlap_ratio(bbox, cell_box) >= 0.3 or center_inside(bbox, cell_box):
181 cell_texts.append(item["text"]) 175 cell_texts.append(item["text"])
176 + item["table"] = {
177 + "bbox": {
178 + "table_index": index,
179 + "row_idx": cell["row_idx"],
180 + "col_idx": cell["col_idx"]
181 + }
182 + }
182 183
183 # thêm vào cell gốc 184 # thêm vào cell gốc
184 cell["texts"] = cell_texts 185 cell["texts"] = cell_texts
......