tien_nemo

nếu chữ mảnh tăng đậm hơn

...@@ -14,7 +14,7 @@ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", " ...@@ -14,7 +14,7 @@ BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "
14 # PDF_NAME = 'aaaa' 14 # PDF_NAME = 'aaaa'
15 15
16 # PDF path 16 # PDF path
17 -pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "Iwasaki_1.pdf" 17 +pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "3.pdf"
18 # Output folder 18 # Output folder
19 output_folder = Path(BASE_DIR) / "public" / "image" 19 output_folder = Path(BASE_DIR) / "public" / "image"
20 20
...@@ -104,6 +104,9 @@ def bolden_text(rgb_img: np.ndarray, ...@@ -104,6 +104,9 @@ def bolden_text(rgb_img: np.ndarray,
104 104
105 # Trả về RGB cho PaddleOCR 105 # Trả về RGB cho PaddleOCR
106 out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB) 106 out_rgb = cv2.cvtColor(out_gray, cv2.COLOR_GRAY2RGB)
107 + debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg")
108 + cv2.imwrite(debug_path, cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR))
109 + print(f"[DEBUG] Preprocessed image saved to: {debug_path}")
107 return out_rgb 110 return out_rgb
108 111
109 preprocessed = bolden_text( 112 preprocessed = bolden_text(
...@@ -114,9 +117,7 @@ preprocessed = bolden_text( ...@@ -114,9 +117,7 @@ preprocessed = bolden_text(
114 sharpness=1.2 # >1.0 để nét hơn 117 sharpness=1.2 # >1.0 để nét hơn
115 ) 118 )
116 119
117 -debug_path = os.path.join(output_folder, f"{img_base_name}_preprocessed_debug.jpg") 120 +
118 -cv2.imwrite(debug_path, cv2.cvtColor(preprocessed, cv2.COLOR_RGB2BGR))
119 -print(f"[DEBUG] Preprocessed image saved to: {debug_path}")
120 121
121 # Gọi OCR (đảm bảo 3 kênh) 122 # Gọi OCR (đảm bảo 3 kênh)
122 if preprocessed.ndim == 2: 123 if preprocessed.ndim == 2:
...@@ -142,7 +143,7 @@ for res in results: ...@@ -142,7 +143,7 @@ for res in results:
142 }) 143 })
143 144
144 # ==== Detect table ==== 145 # ==== Detect table ====
145 -table_info = detect_tables(image_path) 146 +table_info = detect_tables(image_path, ocr_data_list)
146 147
147 # ==== Build JSON ==== 148 # ==== Build JSON ====
148 final_json = { 149 final_json = {
......
...@@ -2,7 +2,7 @@ import cv2 ...@@ -2,7 +2,7 @@ import cv2
2 import numpy as np 2 import numpy as np
3 import os 3 import os
4 4
5 -def detect_tables(image_path): 5 +def detect_tables(image_path, ocr_data_list):
6 img = cv2.imread(image_path) 6 img = cv2.imread(image_path)
7 if img is None: 7 if img is None:
8 raise FileNotFoundError(f"Không đọc được ảnh: {image_path}") 8 raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")
...@@ -25,7 +25,7 @@ def detect_tables(image_path): ...@@ -25,7 +25,7 @@ def detect_tables(image_path):
25 ys_candidates.append(y_mid) 25 ys_candidates.append(y_mid)
26 line_segments.append((x1, x2, y_mid)) 26 line_segments.append((x1, x2, y_mid))
27 27
28 - # gom nhóm các y 28 + # gom nhóm y
29 ys, tol_y = [], 10 29 ys, tol_y = [], 10
30 for y in sorted(ys_candidates): 30 for y in sorted(ys_candidates):
31 if not ys or abs(y - ys[-1]) > tol_y: 31 if not ys or abs(y - ys[-1]) > tol_y:
...@@ -43,7 +43,7 @@ def detect_tables(image_path): ...@@ -43,7 +43,7 @@ def detect_tables(image_path):
43 if abs(x1 - x2) <= 3: 43 if abs(x1 - x2) <= 3:
44 xs.append(int(round((x1 + x2) / 2))) 44 xs.append(int(round((x1 + x2) / 2)))
45 45
46 - # gom nhóm cột 46 + # gom nhóm x
47 x_pos, tol_v = [], 10 47 x_pos, tol_v = [], 10
48 for v in sorted(xs): 48 for v in sorted(xs):
49 if not x_pos or v - x_pos[-1] > tol_v: 49 if not x_pos or v - x_pos[-1] > tol_v:
...@@ -52,26 +52,35 @@ def detect_tables(image_path): ...@@ -52,26 +52,35 @@ def detect_tables(image_path):
52 total_cols = max(0, len(x_pos) - 1) 52 total_cols = max(0, len(x_pos) - 1)
53 53
54 tables = [] 54 tables = []
55 - if len(ys) >= 3 and line_segments: 55 + if total_rows > 0 and total_cols > 0:
56 y_min, y_max = ys[0], ys[-1] 56 y_min, y_max = ys[0], ys[-1]
57 - min_x = min(seg[0] for seg in line_segments) 57 + x_min, x_max = x_pos[0], x_pos[-1]
58 - max_x = max(seg[1] for seg in line_segments) 58 + table_box = (x_min, y_min, x_max, y_max)
59 - table_box = (min_x, y_min, max_x, y_max)
60 59
61 - rows = [] 60 + # build cells
62 - for i in range(len(ys) - 1): 61 + rows_data = []
63 - row_box = (min_x, ys[i], max_x, ys[i+1]) 62 + for i in range(total_rows):
64 - rows.append({"row": tuple(int(v) for v in row_box)}) 63 + row_cells = []
65 - cv2.rectangle(img, (row_box[0], row_box[1]), (row_box[2], row_box[3]), (0, 255, 255), 2) 64 + for j in range(total_cols):
65 + cell_box = (x_pos[j], ys[i], x_pos[j+1], ys[i+1])
66 + row_cells.append({
67 + "cell": cell_box,
68 + "row_idx": i,
69 + "col_idx": j
70 + })
71 + # Vẽ ô
72 + cv2.rectangle(img, (cell_box[0], cell_box[1]), (cell_box[2], cell_box[3]), (0, 255, 255), 1)
73 + rows_data.append(row_cells)
66 74
67 tables.append({ 75 tables.append({
68 "total_rows": int(total_rows), 76 "total_rows": int(total_rows),
69 "total_cols": int(total_cols), 77 "total_cols": int(total_cols),
70 - "table_box": tuple(int(v) for v in table_box), 78 + "table_box": table_box,
71 - "rows_box": rows 79 + "cells": rows_data
72 }) 80 })
73 81
74 - cv2.rectangle(img, (min_x, y_min), (max_x, y_max), (255, 0, 0), 3) 82 + # vẽ viền bảng
83 + cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
75 84
76 debug_path = os.path.splitext(image_path)[0] + "_debug.jpg" 85 debug_path = os.path.splitext(image_path)[0] + "_debug.jpg"
77 cv2.imwrite(debug_path, img) 86 cv2.imwrite(debug_path, img)
......