Showing
2 changed files
with
17 additions
and
3 deletions
| ... | @@ -143,8 +143,22 @@ for res in results: | ... | @@ -143,8 +143,22 @@ for res in results: |
| 143 | }) | 143 | }) |
| 144 | 144 | ||
| 145 | # ==== Detect table ==== | 145 | # ==== Detect table ==== |
| 146 | -table_info = detect_tables(image_path, ocr_data_list) | 146 | +table_info = detect_tables(image_path) |
| 147 | - | 147 | + |
| 148 | +for table in table_info: | ||
| 149 | + for row in table["cells"]: # row là list các cell dict | ||
| 150 | + for cell in row: | ||
| 151 | + x1, y1, x2, y2 = cell["cell"] | ||
| 152 | + cell_texts = [] | ||
| 153 | + | ||
| 154 | + for item in ocr_data_list: | ||
| 155 | + bx1, by1, bx2, by2 = item["bbox"] | ||
| 156 | + if bx1 >= x1 and by1 >= y1 and bx2 <= x2 and by2 <= y2: | ||
| 157 | + cell_texts.append(item["text"]) | ||
| 158 | + | ||
| 159 | + # thêm vào cell gốc | ||
| 160 | + cell["texts"] = cell_texts | ||
| 161 | + cell["text"] = " ".join(cell_texts) | ||
| 148 | # ==== Build JSON ==== | 162 | # ==== Build JSON ==== |
| 149 | final_json = { | 163 | final_json = { |
| 150 | "ocr_data": ocr_data_list, | 164 | "ocr_data": ocr_data_list, | ... | ... |
| ... | @@ -2,7 +2,7 @@ import cv2 | ... | @@ -2,7 +2,7 @@ import cv2 |
| 2 | import numpy as np | 2 | import numpy as np |
| 3 | import os | 3 | import os |
| 4 | 4 | ||
| 5 | -def detect_tables(image_path, ocr_data_list): | 5 | +def detect_tables(image_path): |
| 6 | img = cv2.imread(image_path) | 6 | img = cv2.imread(image_path) |
| 7 | if img is None: | 7 | if img is None: |
| 8 | raise FileNotFoundError(f"Không đọc được ảnh: {image_path}") | 8 | raise FileNotFoundError(f"Không đọc được ảnh: {image_path}") | ... | ... |
-
Please register or sign in to post a comment