tien_nemo

demo

from paddleocr import PaddleOCR
from pdf2image import convert_from_path
import os
import time
import numpy as np
import json
# ==== Config ====
pdf_path = "D:/Learning_Tien/OCR/PaddleOCR/pdf/data_picking_detail.pdf"
output_folder = "D:/Learning_Tien/OCR/ocr-mapping/public/image"
os.makedirs(output_folder, exist_ok=True)
pdf_name = "data_picking_detail"
timestamp = int(time.time())
img_base_name = f"{pdf_name}_{timestamp}"
# ==== OCR Init ====
ocr = PaddleOCR(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False
)
# ==== PDF to Image ====
pages = convert_from_path(pdf_path, first_page=1, last_page=1)
image_path = os.path.join(output_folder, f"{img_base_name}.jpg")
pages[0].save(image_path, "JPEG")
# ==== Run OCR ====
image_np = np.array(pages[0])
results = ocr.predict(image_np)
# ==== Convert polygon to bbox ====
def poly_to_bbox(poly):
xs = [p[0] for p in poly]
ys = [p[1] for p in poly]
return [int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys))]
# ==== Build ocrData ====
ocr_data_list = []
for res in results:
for text, poly in zip(res['rec_texts'], res['rec_polys']):
bbox = poly_to_bbox(poly)
ocr_data_list.append({
"text": text,
"bbox": bbox,
"field": "",
"hideBorder": False
})
# ==== Save JSON ====
json_path = os.path.join(output_folder, f"{pdf_name}_{timestamp}.json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(ocr_data_list, f, ensure_ascii=False, indent=2)
print(f"Saved OCR data JSON to: {json_path}")
[
{
"text": "出庫指示書",
"bbox": [
65,
73,
449,
128
],
"field": "",
"hideBorder": false
},
{
"text": "出庫指示No.",
"bbox": [
1303,
76,
1472,
111
],
"field": "",
"hideBorder": false
},
{
"text": "391189",
"bbox": [
1498,
78,
1604,
110
],
"field": "",
"hideBorder": false
},
{
"text": "2025/06/24",
"bbox": [
952,
94,
1106,
121
],
"field": "",
"hideBorder": false
},
{
"text": "18:57迄",
"bbox": [
1139,
89,
1250,
124
],
"field": "",
"hideBorder": false
},
{
"text": "PAGE1/1",
"bbox": [
1473,
121,
1594,
153
],
"field": "",
"hideBorder": false
},
{
"text": "運送形態",
"bbox": [
83,
145,
239,
184
],
"field": "",
"hideBorder": false
},
{
"text": "30西濃運輸",
"bbox": [
234,
144,
485,
185
],
"field": "",
"hideBorder": false
},
{
"text": "得意先",
"bbox": [
84,
206,
202,
246
],
"field": "",
"hideBorder": false
},
{
"text": "42031(株)フジカケ",
"bbox": [
243,
206,
556,
244
],
"field": "",
"hideBorder": false
},
{
"text": "ミタケ",
"bbox": [
536,
205,
707,
247
],
"field": "",
"hideBorder": false
},
{
"text": "住所",
"bbox": [
84,
266,
179,
298
],
"field": "",
"hideBorder": false
},
{
"text": "〒5050100岐阜県可児郡御嵩町中2411-7",
"bbox": [
207,
267,
834,
297
],
"field": "",
"hideBorder": false
},
{
"text": "電話番号",
"bbox": [
88,
308,
214,
340
],
"field": "",
"hideBorder": false
},
{
"text": "0574673181",
"bbox": [
215,
310,
382,
338
],
"field": "",
"hideBorder": false
},
{
"text": "出庫者",
"bbox": [
925,
331,
1008,
367
],
"field": "",
"hideBorder": false
},
{
"text": "検品者",
"bbox": [
1176,
330,
1261,
366
],
"field": "",
"hideBorder": false
},
{
"text": "包者",
"bbox": [
1431,
331,
1516,
367
],
"field": "",
"hideBorder": false
},
{
"text": "担当者",
"bbox": [
86,
344,
182,
381
],
"field": "",
"hideBorder": false
},
{
"text": "NAS00240渡邊雅章",
"bbox": [
239,
343,
518,
380
],
"field": "",
"hideBorder": false
},
{
"text": "摘要",
"bbox": [
83,
386,
183,
429
],
"field": "",
"hideBorder": false
},
{
"text": "棚番",
"bbox": [
34,
515,
97,
554
],
"field": "",
"hideBorder": false
},
{
"text": "品",
"bbox": [
345,
519,
378,
552
],
"field": "",
"hideBorder": false
},
{
"text": "名",
"bbox": [
423,
519,
456,
551
],
"field": "",
"hideBorder": false
},
{
"text": "規",
"bbox": [
870,
517,
909,
552
],
"field": "",
"hideBorder": false
},
{
"text": "格",
"bbox": [
948,
517,
986,
552
],
"field": "",
"hideBorder": false
},
{
"text": "数量",
"bbox": [
1110,
516,
1174,
554
],
"field": "",
"hideBorder": false
},
{
"text": "受注番号",
"bbox": [
1393,
519,
1505,
551
],
"field": "",
"hideBorder": false
},
{
"text": "B0504",
"bbox": [
39,
567,
124,
600
],
"field": "",
"hideBorder": false
},
{
"text": "ダービー",
"bbox": [
297,
565,
419,
600
],
"field": "",
"hideBorder": false
},
{
"text": "斜ニッパー",
"bbox": [
444,
564,
600,
602
],
"field": "",
"hideBorder": false
},
{
"text": "#30 150MM",
"bbox": [
861,
568,
1014,
602
],
"field": "",
"hideBorder": false
},
{
"text": "2",
"bbox": [
1146,
568,
1176,
606
],
"field": "",
"hideBorder": false
},
{
"text": "(",
"bbox": [
1232,
566,
1256,
601
],
"field": "",
"hideBorder": false
},
{
"text": ")",
"bbox": [
1363,
567,
1385,
600
],
"field": "",
"hideBorder": false
},
{
"text": "250430015",
"bbox": [
1419,
567,
1562,
598
],
"field": "",
"hideBorder": false
},
{
"text": "4562144610607",
"bbox": [
295,
611,
474,
638
],
"field": "",
"hideBorder": false
},
{
"text": "3220060",
"bbox": [
567,
610,
668,
638
],
"field": "",
"hideBorder": false
},
{
"text": "C3101",
"bbox": [
40,
654,
121,
687
],
"field": "",
"hideBorder": false
},
{
"text": "タジマ",
"bbox": [
296,
653,
389,
685
],
"field": "",
"hideBorder": false
},
{
"text": "スーパー墨汁",
"bbox": [
414,
654,
599,
685
],
"field": "",
"hideBorder": false
},
{
"text": "180ML PSB2-180",
"bbox": [
787,
656,
1013,
687
],
"field": "",
"hideBorder": false
},
{
"text": "3",
"bbox": [
1145,
655,
1176,
693
],
"field": "",
"hideBorder": false
},
{
"text": "(",
"bbox": [
1232,
653,
1257,
687
],
"field": "",
"hideBorder": false
},
{
"text": ")",
"bbox": [
1362,
653,
1386,
686
],
"field": "",
"hideBorder": false
},
{
"text": "250430015",
"bbox": [
1420,
655,
1563,
686
],
"field": "",
"hideBorder": false
},
{
"text": "4975364054074",
"bbox": [
295,
698,
474,
725
],
"field": "",
"hideBorder": false
},
{
"text": "550207",
"bbox": [
567,
697,
655,
726
],
"field": "",
"hideBorder": false
},
{
"text": "C3101",
"bbox": [
40,
741,
122,
774
],
"field": "",
"hideBorder": false
},
{
"text": "タジマ",
"bbox": [
295,
738,
390,
774
],
"field": "",
"hideBorder": false
},
{
"text": "雨の日墨汁",
"bbox": [
414,
740,
570,
774
],
"field": "",
"hideBorder": false
},
{
"text": "PSB3-180",
"bbox": [
879,
743,
1013,
774
],
"field": "",
"hideBorder": false
},
{
"text": "2",
"bbox": [
1146,
742,
1176,
780
],
"field": "",
"hideBorder": false
},
{
"text": "(",
"bbox": [
1232,
740,
1257,
774
],
"field": "",
"hideBorder": false
},
{
"text": ")",
"bbox": [
1361,
740,
1386,
774
],
"field": "",
"hideBorder": false
},
{
"text": "250430015",
"bbox": [
1419,
741,
1562,
772
],
"field": "",
"hideBorder": false
},
{
"text": "49270501",
"bbox": [
294,
783,
406,
814
],
"field": "",
"hideBorder": false
},
{
"text": "548140",
"bbox": [
567,
783,
655,
812
],
"field": "",
"hideBorder": false
},
{
"text": "明细行数= 3",
"bbox": [
882,
822,
1136,
871
],
"field": "",
"hideBorder": false
}
]
\ No newline at end of file
This diff is collapsed. Click to expand it.