tien_nemo

fix anh dung keu 22/09

...@@ -141,20 +141,7 @@ for res in results: ...@@ -141,20 +141,7 @@ for res in results:
141 "field": "", 141 "field": "",
142 "hideBorder": False 142 "hideBorder": False
143 }) 143 })
144 - 144 +def overlap_ratio(bbox, cell_box):
145 -# ==== Detect table ====
146 -if debug_file and os.path.exists(debug_file):
147 - image_path = debug_file
148 -table_info = detect_tables(image_path)
149 -
150 -for table in table_info:
151 - for row in table["cells"]: # row là list các cell dict
152 - for cell in row:
153 - x1, y1, x2, y2 = cell["cell"]
154 - cell_texts = []
155 -
156 - # Helper: compute overlap ratio of bbox against cell
157 - def overlap_ratio(bbox, cell_box):
158 ix1 = max(bbox[0], cell_box[0]) 145 ix1 = max(bbox[0], cell_box[0])
159 iy1 = max(bbox[1], cell_box[1]) 146 iy1 = max(bbox[1], cell_box[1])
160 ix2 = min(bbox[2], cell_box[2]) 147 ix2 = min(bbox[2], cell_box[2])
...@@ -164,14 +151,21 @@ for table in table_info: ...@@ -164,14 +151,21 @@ for table in table_info:
164 inter = iw * ih 151 inter = iw * ih
165 bbox_area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) 152 bbox_area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]))
166 return inter / float(bbox_area) 153 return inter / float(bbox_area)
167 - 154 +def center_inside(bbox, cell_box):
168 - # Helper: check center inside cell
169 - def center_inside(bbox, cell_box):
170 cx = (bbox[0] + bbox[2]) / 2.0 155 cx = (bbox[0] + bbox[2]) / 2.0
171 cy = (bbox[1] + bbox[3]) / 2.0 156 cy = (bbox[1] + bbox[3]) / 2.0
172 return (cx >= cell_box[0] and cx <= cell_box[2] and 157 return (cx >= cell_box[0] and cx <= cell_box[2] and
173 cy >= cell_box[1] and cy <= cell_box[3]) 158 cy >= cell_box[1] and cy <= cell_box[3])
159 +# ==== Detect table ====
160 +if debug_file and os.path.exists(debug_file):
161 + image_path = debug_file
162 +table_info = detect_tables(image_path)
174 163
164 +for index, table in enumerate(table_info):
165 + for row in table["cells"]: # row là list các cell dict
166 + for cell in row:
167 + x1, y1, x2, y2 = cell["cell"]
168 + cell_texts = []
175 cell_box = [x1, y1, x2, y2] 169 cell_box = [x1, y1, x2, y2]
176 for item in ocr_data_list: 170 for item in ocr_data_list:
177 bx1, by1, bx2, by2 = item["bbox"] 171 bx1, by1, bx2, by2 = item["bbox"]
...@@ -179,6 +173,13 @@ for table in table_info: ...@@ -179,6 +173,13 @@ for table in table_info:
179 # Accept if bbox is largely inside the cell, or its center lies inside the cell 173 # Accept if bbox is largely inside the cell, or its center lies inside the cell
180 if overlap_ratio(bbox, cell_box) >= 0.3 or center_inside(bbox, cell_box): 174 if overlap_ratio(bbox, cell_box) >= 0.3 or center_inside(bbox, cell_box):
181 cell_texts.append(item["text"]) 175 cell_texts.append(item["text"])
176 + item["table"] = {
177 + "bbox": {
178 + "table_index": index,
179 + "row_idx": cell["row_idx"],
180 + "col_idx": cell["col_idx"]
181 + }
182 + }
182 183
183 # thêm vào cell gốc 184 # thêm vào cell gốc
184 cell["texts"] = cell_texts 185 cell["texts"] = cell_texts
......