table_detector.py
2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import cv2
import numpy as np
import os
def detect_tables(image_path, ocr_data_list):
img = cv2.imread(image_path)
if img is None:
raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3, 3), 0)
# Edge detection
edges = cv2.Canny(blur, 50, 150, apertureSize=3)
# --- Horizontal lines ---
lines_h = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=120,
minLineLength=int(img.shape[1] * 0.6), maxLineGap=20)
ys_candidates, line_segments = [], []
if lines_h is not None:
for l in lines_h:
x1, y1, x2, y2 = l[0]
if abs(y1 - y2) <= 3: # ngang
y_mid = int(round((y1 + y2) / 2))
ys_candidates.append(y_mid)
line_segments.append((x1, x2, y_mid))
# gom nhóm y
ys, tol_y = [], 10
for y in sorted(ys_candidates):
if not ys or abs(y - ys[-1]) > tol_y:
ys.append(y)
total_rows = max(0, len(ys) - 1)
# --- Vertical lines ---
lines_v = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
minLineLength=int(img.shape[0] * 0.5), maxLineGap=20)
xs = []
if lines_v is not None:
for l in lines_v:
x1, y1, x2, y2 = l[0]
if abs(x1 - x2) <= 3:
xs.append(int(round((x1 + x2) / 2)))
# gom nhóm x
x_pos, tol_v = [], 10
for v in sorted(xs):
if not x_pos or v - x_pos[-1] > tol_v:
x_pos.append(v)
total_cols = max(0, len(x_pos) - 1)
tables = []
if total_rows > 0 and total_cols > 0:
y_min, y_max = ys[0], ys[-1]
x_min, x_max = x_pos[0], x_pos[-1]
table_box = (x_min, y_min, x_max, y_max)
# build cells
rows_data = []
for i in range(total_rows):
row_cells = []
for j in range(total_cols):
cell_box = (x_pos[j], ys[i], x_pos[j+1], ys[i+1])
row_cells.append({
"cell": cell_box,
"row_idx": i,
"col_idx": j
})
# Vẽ ô
cv2.rectangle(img, (cell_box[0], cell_box[1]), (cell_box[2], cell_box[3]), (0, 255, 255), 1)
rows_data.append(row_cells)
tables.append({
"total_rows": int(total_rows),
"total_cols": int(total_cols),
"table_box": table_box,
"cells": rows_data
})
# vẽ viền bảng
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
debug_path = os.path.splitext(image_path)[0] + "_debug.jpg"
cv2.imwrite(debug_path, img)
return tables