table_detector.py 2.71 KB

Raw Blame History Permalink

import cv2
import numpy as np
import os

def detect_tables(image_path, ocr_data_list):
    img = cv2.imread(image_path)
    if img is None:
        raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (3, 3), 0)

    # Edge detection
    edges = cv2.Canny(blur, 50, 150, apertureSize=3)

    # --- Horizontal lines ---
    lines_h = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=120,
                              minLineLength=int(img.shape[1] * 0.6), maxLineGap=20)
    ys_candidates, line_segments = [], []
    if lines_h is not None:
        for l in lines_h:
            x1, y1, x2, y2 = l[0]
            if abs(y1 - y2) <= 3:  # ngang
                y_mid = int(round((y1 + y2) / 2))
                ys_candidates.append(y_mid)
                line_segments.append((x1, x2, y_mid))

    # gom nhóm y
    ys, tol_y = [], 10
    for y in sorted(ys_candidates):
        if not ys or abs(y - ys[-1]) > tol_y:
            ys.append(y)

    total_rows = max(0, len(ys) - 1)

    # --- Vertical lines ---
    lines_v = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
                              minLineLength=int(img.shape[0] * 0.5), maxLineGap=20)
    xs = []
    if lines_v is not None:
        for l in lines_v:
            x1, y1, x2, y2 = l[0]
            if abs(x1 - x2) <= 3:
                xs.append(int(round((x1 + x2) / 2)))

    # gom nhóm x
    x_pos, tol_v = [], 10
    for v in sorted(xs):
        if not x_pos or v - x_pos[-1] > tol_v:
            x_pos.append(v)

    total_cols = max(0, len(x_pos) - 1)

    tables = []
    if total_rows > 0 and total_cols > 0:
        y_min, y_max = ys[0], ys[-1]
        x_min, x_max = x_pos[0], x_pos[-1]
        table_box = (x_min, y_min, x_max, y_max)

        # build cells
        rows_data = []
        for i in range(total_rows):
            row_cells = []
            for j in range(total_cols):
                cell_box = (x_pos[j], ys[i], x_pos[j+1], ys[i+1])
                row_cells.append({
                    "cell": cell_box,
                    "row_idx": i,
                    "col_idx": j
                })
                # Vẽ ô
                cv2.rectangle(img, (cell_box[0], cell_box[1]), (cell_box[2], cell_box[3]), (0, 255, 255), 1)
            rows_data.append(row_cells)

        tables.append({
            "total_rows": int(total_rows),
            "total_cols": int(total_cols),
            "table_box": table_box,
            "cells": rows_data
        })

        # vẽ viền bảng
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

    debug_path = os.path.splitext(image_path)[0] + "_debug.jpg"
    cv2.imwrite(debug_path, img)

    return tables