Showing
1 changed file
with
23 additions
and
22 deletions
| ... | @@ -141,37 +141,31 @@ for res in results: | ... | @@ -141,37 +141,31 @@ for res in results: |
| 141 | "field": "", | 141 | "field": "", |
| 142 | "hideBorder": False | 142 | "hideBorder": False |
| 143 | }) | 143 | }) |
| 144 | - | 144 | +def overlap_ratio(bbox, cell_box): |
| 145 | + ix1 = max(bbox[0], cell_box[0]) | ||
| 146 | + iy1 = max(bbox[1], cell_box[1]) | ||
| 147 | + ix2 = min(bbox[2], cell_box[2]) | ||
| 148 | + iy2 = min(bbox[3], cell_box[3]) | ||
| 149 | + iw = max(0, ix2 - ix1) | ||
| 150 | + ih = max(0, iy2 - iy1) | ||
| 151 | + inter = iw * ih | ||
| 152 | + bbox_area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) | ||
| 153 | + return inter / float(bbox_area) | ||
| 154 | +def center_inside(bbox, cell_box): | ||
| 155 | + cx = (bbox[0] + bbox[2]) / 2.0 | ||
| 156 | + cy = (bbox[1] + bbox[3]) / 2.0 | ||
| 157 | + return (cx >= cell_box[0] and cx <= cell_box[2] and | ||
| 158 | + cy >= cell_box[1] and cy <= cell_box[3]) | ||
| 145 | # ==== Detect table ==== | 159 | # ==== Detect table ==== |
| 146 | if debug_file and os.path.exists(debug_file): | 160 | if debug_file and os.path.exists(debug_file): |
| 147 | image_path = debug_file | 161 | image_path = debug_file |
| 148 | table_info = detect_tables(image_path) | 162 | table_info = detect_tables(image_path) |
| 149 | 163 | ||
| 150 | -for table in table_info: | 164 | +for index, table in enumerate(table_info): |
| 151 | for row in table["cells"]: # row là list các cell dict | 165 | for row in table["cells"]: # row là list các cell dict |
| 152 | for cell in row: | 166 | for cell in row: |
| 153 | x1, y1, x2, y2 = cell["cell"] | 167 | x1, y1, x2, y2 = cell["cell"] |
| 154 | cell_texts = [] | 168 | cell_texts = [] |
| 155 | - | ||
| 156 | - # Helper: compute overlap ratio of bbox against cell | ||
| 157 | - def overlap_ratio(bbox, cell_box): | ||
| 158 | - ix1 = max(bbox[0], cell_box[0]) | ||
| 159 | - iy1 = max(bbox[1], cell_box[1]) | ||
| 160 | - ix2 = min(bbox[2], cell_box[2]) | ||
| 161 | - iy2 = min(bbox[3], cell_box[3]) | ||
| 162 | - iw = max(0, ix2 - ix1) | ||
| 163 | - ih = max(0, iy2 - iy1) | ||
| 164 | - inter = iw * ih | ||
| 165 | - bbox_area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) | ||
| 166 | - return inter / float(bbox_area) | ||
| 167 | - | ||
| 168 | - # Helper: check center inside cell | ||
| 169 | - def center_inside(bbox, cell_box): | ||
| 170 | - cx = (bbox[0] + bbox[2]) / 2.0 | ||
| 171 | - cy = (bbox[1] + bbox[3]) / 2.0 | ||
| 172 | - return (cx >= cell_box[0] and cx <= cell_box[2] and | ||
| 173 | - cy >= cell_box[1] and cy <= cell_box[3]) | ||
| 174 | - | ||
| 175 | cell_box = [x1, y1, x2, y2] | 169 | cell_box = [x1, y1, x2, y2] |
| 176 | for item in ocr_data_list: | 170 | for item in ocr_data_list: |
| 177 | bx1, by1, bx2, by2 = item["bbox"] | 171 | bx1, by1, bx2, by2 = item["bbox"] |
| ... | @@ -179,6 +173,13 @@ for table in table_info: | ... | @@ -179,6 +173,13 @@ for table in table_info: |
| 179 | # Accept if bbox is largely inside the cell, or its center lies inside the cell | 173 | # Accept if bbox is largely inside the cell, or its center lies inside the cell |
| 180 | if overlap_ratio(bbox, cell_box) >= 0.3 or center_inside(bbox, cell_box): | 174 | if overlap_ratio(bbox, cell_box) >= 0.3 or center_inside(bbox, cell_box): |
| 181 | cell_texts.append(item["text"]) | 175 | cell_texts.append(item["text"]) |
| 176 | + item["table"] = { | ||
| 177 | + "bbox": { | ||
| 178 | + "table_index": index, | ||
| 179 | + "row_idx": cell["row_idx"], | ||
| 180 | + "col_idx": cell["col_idx"] | ||
| 181 | + } | ||
| 182 | + } | ||
| 182 | 183 | ||
| 183 | # thêm vào cell gốc | 184 | # thêm vào cell gốc |
| 184 | cell["texts"] = cell_texts | 185 | cell["texts"] = cell_texts | ... | ... |
-
Please register or sign in to post a comment