tien_nemo

demo2

...@@ -22,6 +22,7 @@ class OcrController extends Controller ...@@ -22,6 +22,7 @@ class OcrController extends Controller
22 'customer_name_xy' => 'required|string', 22 'customer_name_xy' => 'required|string',
23 ]); 23 ]);
24 $dataDetail = $request->fields ?? []; 24 $dataDetail = $request->fields ?? [];
25 + $tableColumns = $request->table_columns ?? [];
25 try { 26 try {
26 $masterTemplate = MstTemplate::updateOrCreate( 27 $masterTemplate = MstTemplate::updateOrCreate(
27 ['tpl_name' => $request->template_name], 28 ['tpl_name' => $request->template_name],
...@@ -49,6 +50,24 @@ class OcrController extends Controller ...@@ -49,6 +50,24 @@ class OcrController extends Controller
49 ); 50 );
50 51
51 } 52 }
53 +
54 + // Lưu mapping cột bảng (lưu col index vào field_xy với field_name đặc biệt)
55 + if (!empty($tableColumns) && is_array($tableColumns)) {
56 + foreach ($tableColumns as $name => $colIdx) {
57 + if ($colIdx === null || $colIdx === '' || $colIdx === false) {
58 + continue;
59 + }
60 + DtTemplate::updateOrInsert(
61 + [
62 + 'tpl_id' => $masterTemplate->id,
63 + 'field_name' => '__table_col__' . $name,
64 + ],
65 + [
66 + 'field_xy' => (string) $colIdx,
67 + ]
68 + );
69 + }
70 + }
52 return response()->json([ 71 return response()->json([
53 'success' => true, 72 'success' => true,
54 'message' => 'Lưu template thành công', 73 'message' => 'Lưu template thành công',
...@@ -70,8 +89,8 @@ class OcrController extends Controller ...@@ -70,8 +89,8 @@ class OcrController extends Controller
70 $templateName = $request->get('template_name', ''); 89 $templateName = $request->get('template_name', '');
71 90
72 // Giả sử file OCR JSON & ảnh nằm trong storage/app/public/image/ 91 // Giả sử file OCR JSON & ảnh nằm trong storage/app/public/image/
73 - $jsonPath = public_path("image/3_1757295841_with_table.json"); 92 + $jsonPath = public_path("image/nemo_new_1757393338_with_table.json");
74 - $imgPath = ("image/3_1757295841.jpg"); 93 + $imgPath = ("image/nemo_new_1757393338.jpg");
75 94
76 if (!file_exists($jsonPath)) { 95 if (!file_exists($jsonPath)) {
77 return response()->json(['error' => 'File OCR JSON không tìm thấy'], 404); 96 return response()->json(['error' => 'File OCR JSON không tìm thấy'], 404);
...@@ -100,7 +119,13 @@ class OcrController extends Controller ...@@ -100,7 +119,13 @@ class OcrController extends Controller
100 // Lấy detail của template 119 // Lấy detail của template
101 $details = DtTemplate::where('tpl_id', $mst->id)->get(); 120 $details = DtTemplate::where('tpl_id', $mst->id)->get();
102 121
122 + $tableColumnMapping = [];
103 foreach ($details as $detail) { 123 foreach ($details as $detail) {
124 + if (strpos($detail->field_name, '__table_col__') === 0) {
125 + $name = substr($detail->field_name, strlen('__table_col__'));
126 + $tableColumnMapping[$name] = is_numeric($detail->field_xy) ? intval($detail->field_xy) : null;
127 + continue;
128 + }
104 $coords = array_map('intval', explode(',', $detail->field_xy)); 129 $coords = array_map('intval', explode(',', $detail->field_xy));
105 // coords = [x1, y1, x2, y2] 130 // coords = [x1, y1, x2, y2]
106 131
...@@ -130,6 +155,7 @@ class OcrController extends Controller ...@@ -130,6 +155,7 @@ class OcrController extends Controller
130 'pdfImageUrl' => $imgPath, 155 'pdfImageUrl' => $imgPath,
131 'dataMapping' => $dataMapping, 156 'dataMapping' => $dataMapping,
132 'is_template' => $is_template, 157 'is_template' => $is_template,
158 + 'tableColumnMapping' => $tableColumnMapping ?? [], //?? new \stdClass(),
133 'fieldOptions' => [ 159 'fieldOptions' => [
134 [ 'value' => 'template_name', 'label' => 'Tên Mẫu PDF' ], 160 [ 'value' => 'template_name', 'label' => 'Tên Mẫu PDF' ],
135 [ 'value' => 'customer_name', 'label' => 'Tên khách hàng' ], 161 [ 'value' => 'customer_name', 'label' => 'Tên khách hàng' ],
......
...@@ -11,14 +11,14 @@ from PIL import Image, ImageEnhance ...@@ -11,14 +11,14 @@ from PIL import Image, ImageEnhance
11 11
12 # ==== Config ==== 12 # ==== Config ====
13 BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) 13 BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
14 -# PDF_NAME = 'aaaa' 14 +PDF_NAME = 'nemo_new'
15 15
16 # PDF path 16 # PDF path
17 -pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "3.pdf" 17 +pdf_path = Path(BASE_DIR) / "storage" / "pdf" / "2.pdf"
18 # Output folder 18 # Output folder
19 output_folder = Path(BASE_DIR) / "public" / "image" 19 output_folder = Path(BASE_DIR) / "public" / "image"
20 20
21 -PDF_NAME = pdf_path.stem # Get the stem of the PDF file 21 +# PDF_NAME = pdf_path.stem # Get the stem of the PDF file
22 #print(PDF_NAME) 22 #print(PDF_NAME)
23 23
24 os.makedirs(output_folder, exist_ok=True) 24 os.makedirs(output_folder, exist_ok=True)
...@@ -151,9 +151,31 @@ for table in table_info: ...@@ -151,9 +151,31 @@ for table in table_info:
151 x1, y1, x2, y2 = cell["cell"] 151 x1, y1, x2, y2 = cell["cell"]
152 cell_texts = [] 152 cell_texts = []
153 153
154 + # Helper: compute overlap ratio of bbox against cell
155 + def overlap_ratio(bbox, cell_box):
156 + ix1 = max(bbox[0], cell_box[0])
157 + iy1 = max(bbox[1], cell_box[1])
158 + ix2 = min(bbox[2], cell_box[2])
159 + iy2 = min(bbox[3], cell_box[3])
160 + iw = max(0, ix2 - ix1)
161 + ih = max(0, iy2 - iy1)
162 + inter = iw * ih
163 + bbox_area = max(1, (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]))
164 + return inter / float(bbox_area)
165 +
166 + # Helper: check center inside cell
167 + def center_inside(bbox, cell_box):
168 + cx = (bbox[0] + bbox[2]) / 2.0
169 + cy = (bbox[1] + bbox[3]) / 2.0
170 + return (cx >= cell_box[0] and cx <= cell_box[2] and
171 + cy >= cell_box[1] and cy <= cell_box[3])
172 +
173 + cell_box = [x1, y1, x2, y2]
154 for item in ocr_data_list: 174 for item in ocr_data_list:
155 bx1, by1, bx2, by2 = item["bbox"] 175 bx1, by1, bx2, by2 = item["bbox"]
156 - if bx1 >= x1 and by1 >= y1 and bx2 <= x2 and by2 <= y2: 176 + bbox = [bx1, by1, bx2, by2]
177 + # Accept if bbox is largely inside the cell, or its center lies inside the cell
178 + if overlap_ratio(bbox, cell_box) >= 0.3 or center_inside(bbox, cell_box):
157 cell_texts.append(item["text"]) 179 cell_texts.append(item["text"])
158 180
159 # thêm vào cell gốc 181 # thêm vào cell gốc
......
...@@ -2,46 +2,73 @@ import cv2 ...@@ -2,46 +2,73 @@ import cv2
2 import numpy as np 2 import numpy as np
3 import os 3 import os
4 4
5 -def detect_tables(image_path): 5 +def filter_horizontal_lines(lines_h, img_width, min_h_len_ratio=0.8, tol_y=10):
6 + if lines_h is None:
7 + return [], []
8 +
9 + ys_candidates = []
10 + for l in lines_h:
11 + x1, y1, x2, y2 = l[0]
12 + if abs(y1 - y2) <= 3: # ngang
13 + line_len = abs(x2 - x1)
14 + y_mid = int(round((y1 + y2) / 2))
15 + ys_candidates.append((y_mid, line_len, x1, x2))
16 +
17 + ys_candidates.sort(key=lambda x: x[0])
18 + filtered_lines, line_segments, current_group = [], [], []
19 +
20 + for y, length, x1, x2 in ys_candidates:
21 + if not current_group:
22 + current_group.append((y, length, x1, x2))
23 + else:
24 + if abs(y - current_group[-1][0]) <= tol_y:
25 + current_group.append((y, length, x1, x2))
26 + else:
27 + longest = max(current_group, key=lambda x: x[1])
28 + if longest[1] >= min_h_len_ratio * img_width:
29 + filtered_lines.append(longest[0])
30 + line_segments.append((longest[2], longest[3], longest[0]))
31 + else:
32 + break
33 + current_group = [(y, length, x1, x2)]
34 +
35 + if current_group:
36 + longest = max(current_group, key=lambda x: x[1])
37 + if longest[1] >= min_h_len_ratio * img_width:
38 + filtered_lines.append(longest[0])
39 + line_segments.append((longest[2], longest[3], longest[0]))
40 +
41 + total_rows = max(0, len(filtered_lines) - 1)
42 + print(f"Detected {total_rows} rows")
43 + return filtered_lines, line_segments
44 +
45 +
46 +def detect_tables(image_path, gap_threshold=50):
6 img = cv2.imread(image_path) 47 img = cv2.imread(image_path)
7 if img is None: 48 if img is None:
8 raise FileNotFoundError(f"Không đọc được ảnh: {image_path}") 49 raise FileNotFoundError(f"Không đọc được ảnh: {image_path}")
9 50
10 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 51 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
11 blur = cv2.GaussianBlur(gray, (3, 3), 0) 52 blur = cv2.GaussianBlur(gray, (3, 3), 0)
12 -
13 - # Edge detection
14 edges = cv2.Canny(blur, 50, 150, apertureSize=3) 53 edges = cv2.Canny(blur, 50, 150, apertureSize=3)
15 54
16 # --- Horizontal lines --- 55 # --- Horizontal lines ---
17 lines_h = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=120, 56 lines_h = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=120,
18 minLineLength=int(img.shape[1] * 0.6), maxLineGap=20) 57 minLineLength=int(img.shape[1] * 0.6), maxLineGap=20)
19 - ys_candidates, line_segments = [], [] 58 + img_height, img_width = img.shape[:2]
20 - if lines_h is not None: 59 + ys, line_segments = filter_horizontal_lines(lines_h, img_width, min_h_len_ratio=0.8, tol_y=10)
21 - for l in lines_h:
22 - x1, y1, x2, y2 = l[0]
23 - if abs(y1 - y2) <= 3: # ngang
24 - y_mid = int(round((y1 + y2) / 2))
25 - ys_candidates.append(y_mid)
26 - line_segments.append((x1, x2, y_mid))
27 -
28 - # gom nhóm y
29 - ys, tol_y = [], 10
30 - for y in sorted(ys_candidates):
31 - if not ys or abs(y - ys[-1]) > tol_y:
32 - ys.append(y)
33 -
34 total_rows = max(0, len(ys) - 1) 60 total_rows = max(0, len(ys) - 1)
35 61
36 # --- Vertical lines --- 62 # --- Vertical lines ---
37 lines_v = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, 63 lines_v = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
38 - minLineLength=int(img.shape[0] * 0.5), maxLineGap=20) 64 + minLineLength=int(img.shape[0] * 0.4), maxLineGap=20)
39 - xs = [] 65 + v_lines, xs = [], []
40 if lines_v is not None: 66 if lines_v is not None:
41 for l in lines_v: 67 for l in lines_v:
42 x1, y1, x2, y2 = l[0] 68 x1, y1, x2, y2 = l[0]
43 if abs(x1 - x2) <= 3: 69 if abs(x1 - x2) <= 3:
44 xs.append(int(round((x1 + x2) / 2))) 70 xs.append(int(round((x1 + x2) / 2)))
71 + v_lines.append((int(round((x1 + x2) / 2)), min(y1, y2), max(y1, y2)))
45 72
46 # gom nhóm x 73 # gom nhóm x
47 x_pos, tol_v = [], 10 74 x_pos, tol_v = [], 10
...@@ -50,26 +77,66 @@ def detect_tables(image_path): ...@@ -50,26 +77,66 @@ def detect_tables(image_path):
50 x_pos.append(v) 77 x_pos.append(v)
51 78
52 total_cols = max(0, len(x_pos) - 1) 79 total_cols = max(0, len(x_pos) - 1)
53 -
54 tables = [] 80 tables = []
81 +
55 if total_rows > 0 and total_cols > 0: 82 if total_rows > 0 and total_cols > 0:
56 y_min, y_max = ys[0], ys[-1] 83 y_min, y_max = ys[0], ys[-1]
57 x_min, x_max = x_pos[0], x_pos[-1] 84 x_min, x_max = x_pos[0], x_pos[-1]
58 table_box = (x_min, y_min, x_max, y_max) 85 table_box = (x_min, y_min, x_max, y_max)
59 86
60 - # build cells
61 rows_data = [] 87 rows_data = []
62 for i in range(total_rows): 88 for i in range(total_rows):
63 row_cells = [] 89 row_cells = []
64 - for j in range(total_cols): 90 + j = 0
91 + while j < total_cols:
65 cell_box = (x_pos[j], ys[i], x_pos[j+1], ys[i+1]) 92 cell_box = (x_pos[j], ys[i], x_pos[j+1], ys[i+1])
93 + row_height = cell_box[3] - cell_box[1]
94 +
95 + # Check vertical line coverage (>=70% chiều cao hàng)
96 + has_left = any(
97 + abs(x - cell_box[0]) <= tol_v and
98 + (min(y_end, cell_box[3]) - max(y_start, cell_box[1])) >= 0.7 * row_height
99 + for x, y_start, y_end in v_lines
100 + )
101 + has_right = any(
102 + abs(x - cell_box[2]) <= tol_v and
103 + (min(y_end, cell_box[3]) - max(y_start, cell_box[1])) >= 0.7 * row_height
104 + for x, y_start, y_end in v_lines
105 + )
106 +
107 + if has_left and has_right:
108 + col_start = j
109 + col_end = j
110 + # nếu cột tiếp theo không có line → merge
111 + while col_end + 1 < total_cols:
112 + next_box = (x_pos[col_end+1], ys[i], x_pos[col_end+2], ys[i+1])
113 + has_next_left = any(
114 + abs(x - next_box[0]) <= tol_v and
115 + (min(y_end, next_box[3]) - max(y_start, next_box[1])) >= 0.7 * row_height
116 + for x, y_start, y_end in v_lines
117 + )
118 + if not has_next_left: # merge tiếp
119 + col_end += 1
120 + else:
121 + break
122 +
123 + merged_box = (x_pos[col_start], ys[i], x_pos[col_end+1], ys[i+1])
124 + if col_start == col_end:
125 + col_id = col_start
126 + else:
127 + col_id = f"{col_start}-{col_end}"
128 +
66 row_cells.append({ 129 row_cells.append({
67 - "cell": cell_box, 130 + "cell": merged_box,
68 "row_idx": i, 131 "row_idx": i,
69 - "col_idx": j 132 + "col_idx": col_id
70 }) 133 })
71 - # Vẽ ô 134 + cv2.rectangle(img, (merged_box[0], merged_box[1]),
72 - cv2.rectangle(img, (cell_box[0], cell_box[1]), (cell_box[2], cell_box[3]), (0, 255, 255), 1) 135 + (merged_box[2], merged_box[3]), (0, 255, 255), 1)
136 + j = col_end + 1
137 + else:
138 + j += 1 # skip ô lỗi (không có line đầy đủ)
139 +
73 rows_data.append(row_cells) 140 rows_data.append(row_cells)
74 141
75 tables.append({ 142 tables.append({
...@@ -78,11 +145,9 @@ def detect_tables(image_path): ...@@ -78,11 +145,9 @@ def detect_tables(image_path):
78 "table_box": table_box, 145 "table_box": table_box,
79 "cells": rows_data 146 "cells": rows_data
80 }) 147 })
81 -
82 - # vẽ viền bảng
83 cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2) 148 cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
84 149
85 - debug_path = os.path.splitext(image_path)[0] + "_debug.jpg" 150 + debug_path = os.path.splitext(image_path)[0] + "_fix_debug.jpg"
86 cv2.imwrite(debug_path, img) 151 cv2.imwrite(debug_path, img)
87 152
88 return tables 153 return tables
......