Merge branch '0.8.X' of https://gitee.com/hdka/klp-oa into 0.8.X

2025-08-01 12:57:34 +08:00
parent 5b619eca0a d11ba38337
commit d7a8f68188
2 changed files with 158 additions and 0 deletions
--- a/klp-wms/pom.xml
+++ b/klp-wms/pom.xml
@@ -22,5 +22,13 @@
            <artifactId>klp-common</artifactId>
            <version>0.8.3</version>
        </dependency>
+        <!-- https://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j -->
+        <dependency>
+            <groupId>net.sourceforge.tess4j</groupId>
+            <artifactId>tess4j</artifactId>
+            <version>5.11.0</version>
+        </dependency>
+
+
    </dependencies>
 </project>
--- a/klp-wms/src/main/java/com/klp/controller/Application.java
+++ b/klp-wms/src/main/java/com/klp/controller/Application.java
@@ -0,0 +1,150 @@
+package com.klp.controller;
+
+import net.sourceforge.tess4j.ITesseract;
+import net.sourceforge.tess4j.Tesseract;
+import net.sourceforge.tess4j.TesseractException;
+
+import javax.imageio.ImageIO;
+import java.awt.*;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+
+public class Application {
+    
+    /**
+     * @Description: 识别图片中的文字
+     * @param args
+     */
+    public static void main(String[] args) {
+        try {
+            // 获取本地图片
+            File file = new File("D:\\桌面\\照片\\11.png");
+            
+            if (!file.exists()) {
+                System.err.println("图片文件不存在: " + file.getAbsolutePath());
+                return;
+            }
+            
+            System.out.println("开始OCR识别...");
+            System.out.println("图片路径: " + file.getAbsolutePath());
+            
+            // 创建Tesseract对象
+            ITesseract tesseract = new Tesseract();
+            
+            // 设置字体库路径
+            tesseract.setDatapath("D:\\front");
+            
+            // 设置语言简体中文
+            tesseract.setLanguage("chi_sim");
+            
+            // 优化OCR配置
+            tesseract.setPageSegMode(6); // 假设统一的文本块
+            tesseract.setOcrEngineMode(3); // 使用默认引擎
+            
+            // 执行OCR识别图片
+            String result = tesseract.doOCR(file);
+            
+            System.out.println("\n=== 原始识别结果 ===");
+            System.out.println(result);
+            
+            // 清理和格式化结果
+            String cleanedResult = cleanOcrResult(result);
+            
+            System.out.println("\n=== 清理后的结果 ===");
+            System.out.println(cleanedResult);
+            
+            // 分行显示结果
+            System.out.println("\n=== 分行显示结果 ===");
+            String[] lines = cleanedResult.split("-");
+            for (int i = 0; i < lines.length; i++) {
+                if (!lines[i].trim().isEmpty()) {
+                    System.out.println((i + 1) + ". " + lines[i].trim());
+                }
+            }
+            
+        } catch (TesseractException e) {
+            System.err.println("Tesseract OCR错误: " + e.getMessage());
+            e.printStackTrace();
+        } catch (Exception e) {
+            System.err.println("其他错误: " + e.getMessage());
+            e.printStackTrace();
+        }
+    }
+    
+    /**
+     * 清理OCR识别结果
+     * @param result 原始识别结果
+     * @return 清理后的结果
+     */
+    private static String cleanOcrResult(String result) {
+        if (result == null || result.trim().isEmpty()) {
+            return "";
+        }
+        
+        // 替换常见的OCR错误
+        String cleaned = result
+            // 替换回车和换行
+            .replaceAll("\\r|\\n", "-")
+            // 移除多余的空格
+            .replaceAll("\\s+", " ")
+            // 修复常见的OCR错误
+            .replaceAll("英声租", "产品名称")
+            .replaceAll("库咤埕号", "产品型号")
+            .replaceAll("产晓序列号", "产品序列号")
+            .replaceAll("购买纳证缉号", "购买凭证编号")
+            .replaceAll("质保条孰", "质保条款")
+            .replaceAll("本亢命", "本产品")
+            .replaceAll("质僚服务", "质保服务")
+            .replaceAll("质保朝内", "质保期内")
+            .replaceAll("团素", "因素")
+            .replaceAll("质量闰题", "质量问题")
+            .replaceAll("免贵维修", "免费维修")
+            .replaceAll("更挺服", "更换服")
+            .replaceAll("不包挂", "不包括")
+            .replaceAll("溢用", "滥用")
+            .replaceAll("探环", "损坏")
+            .replaceAll("取葛保管", "妥善保管")
+            .replaceAll("雷凭吊证明", "需凭此证明")
+            .replaceAll("客户信恩", "客户信息")
+            .replaceAll("姆钗", "姓名")
+            .replaceAll("联系电语", "联系电话")
+            .replaceAll("电子邹件", "电子邮件")
+            .replaceAll("地抛", "地址")
+            .replaceAll("摒权代表", "授权代表")
+            .replaceAll("介为示例", "仅为示例")
+            .replaceAll("根揪实际情况", "根据实际情况")
+            .replaceAll("调教", "调整")
+            // 移除多余的空格
+            .trim();
+        
+        return cleaned;
+    }
+    
+    /**
+     * 图片预处理（可选）
+     * @param inputFile 输入图片
+     * @param outputFile 输出图片
+     */
+    private static void preprocessImage(File inputFile, File outputFile) {
+        try {
+            BufferedImage image = ImageIO.read(inputFile);
+            
+            // 转换为灰度图
+            BufferedImage grayImage = new BufferedImage(
+                image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
+            
+            Graphics2D g2d = grayImage.createGraphics();
+            g2d.drawImage(image, 0, 0, null);
+            g2d.dispose();
+            
+            // 保存预处理后的图片
+            ImageIO.write(grayImage, "png", outputFile);
+            
+            System.out.println("图片预处理完成: " + outputFile.getAbsolutePath());
+            
+        } catch (IOException e) {
+            System.err.println("图片预处理失败: " + e.getMessage());
+        }
+    }
+}