diff --git a/klp-wms/src/main/java/com/klp/controller/Application.java b/klp-wms/src/main/java/com/klp/controller/Application.java index 9e7c5f93..be90f5d5 100644 --- a/klp-wms/src/main/java/com/klp/controller/Application.java +++ b/klp-wms/src/main/java/com/klp/controller/Application.java @@ -8,10 +8,15 @@ import javax.imageio.ImageIO; import java.awt.*; import java.awt.image.BufferedImage; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; public class Application { - + /** * @Description: 识别图片中的文字 * @param args @@ -19,41 +24,47 @@ public class Application { public static void main(String[] args) { try { // 获取本地图片 - File file = new File("D:\\桌面\\照片\\11.png"); - - if (!file.exists()) { - System.err.println("图片文件不存在: " + file.getAbsolutePath()); - return; - } - + // File file = new File("D:\\test.png"); + + String imageUrl = "http://47.117.71.33:11296/api/v1/buckets/fadapp-update/objects/download?preview=true&prefix=%E5%BE%AE%E4%BF%A1%E5%9B%BE%E7%89%87_20250731172707.png&version_id=null"; + // 读取网络图片为 BufferedImage + URL url = new URL(imageUrl); + InputStream inputStream = url.openStream(); + BufferedImage image = ImageIO.read(inputStream); + inputStream.close(); + // 预处理图片 + BufferedImage bufferedImage = preprocessImage(image); + System.out.println("开始OCR识别..."); - System.out.println("图片路径: " + file.getAbsolutePath()); - + // 创建Tesseract对象 ITesseract tesseract = new Tesseract(); - - // 设置字体库路径 - tesseract.setDatapath("D:\\front"); - + + // 设置字体库路径(绝对路径) + tesseract.setDatapath("D:\\tessdata"); + +/* File tessdataDir = exportTessdataToTemp(); // 从 classpath 复制到临时目录 + tesseract.setDatapath(tessdataDir.getAbsolutePath());*/ + // 设置语言简体中文 tesseract.setLanguage("chi_sim"); - + // 优化OCR配置 tesseract.setPageSegMode(6); // 假设统一的文本块 tesseract.setOcrEngineMode(3); // 使用默认引擎 - + // 执行OCR识别图片 - String result = tesseract.doOCR(file); - + String result = tesseract.doOCR(bufferedImage); + System.out.println("\n=== 原始识别结果 ==="); System.out.println(result); - + // 清理和格式化结果 String cleanedResult = cleanOcrResult(result); - + System.out.println("\n=== 清理后的结果 ==="); System.out.println(cleanedResult); - + // 分行显示结果 System.out.println("\n=== 分行显示结果 ==="); String[] lines = cleanedResult.split("-"); @@ -62,7 +73,7 @@ public class Application { System.out.println((i + 1) + ". " + lines[i].trim()); } } - + } catch (TesseractException e) { System.err.println("Tesseract OCR错误: " + e.getMessage()); e.printStackTrace(); @@ -71,7 +82,7 @@ public class Application { e.printStackTrace(); } } - + /** * 清理OCR识别结果 * @param result 原始识别结果 @@ -81,7 +92,7 @@ public class Application { if (result == null || result.trim().isEmpty()) { return ""; } - + // 替换常见的OCR错误 String cleaned = result // 替换回车和换行 @@ -117,10 +128,10 @@ public class Application { .replaceAll("调教", "调整") // 移除多余的空格 .trim(); - + return cleaned; } - + /** * 图片预处理(可选) * @param inputFile 输入图片 @@ -129,22 +140,61 @@ public class Application { private static void preprocessImage(File inputFile, File outputFile) { try { BufferedImage image = ImageIO.read(inputFile); - + // 转换为灰度图 BufferedImage grayImage = new BufferedImage( image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); - + Graphics2D g2d = grayImage.createGraphics(); g2d.drawImage(image, 0, 0, null); g2d.dispose(); - + // 保存预处理后的图片 ImageIO.write(grayImage, "png", outputFile); - + System.out.println("图片预处理完成: " + outputFile.getAbsolutePath()); - + } catch (IOException e) { System.err.println("图片预处理失败: " + e.getMessage()); } } + + /** + * 灰度化图像(直接处理 BufferedImage) + * @param image 原始图片 + * @return 灰度图 + */ + private static BufferedImage preprocessImage(BufferedImage image) { + BufferedImage grayImage = new BufferedImage( + image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); + + Graphics2D g2d = grayImage.createGraphics(); + g2d.drawImage(image, 0, 0, null); + g2d.dispose(); + + return grayImage; + } + + public static File exportTessdataToTemp() throws IOException { + String[] languageFiles = {"chi_sim.traineddata","chi_sim_vert.traineddata"}; + // 明确指定临时目录路径(避免路径分隔符问题) + File tempTessdataDir = new File(System.getProperty("java.io.tmpdir"), "tessdata"); + if (!tempTessdataDir.exists()) { + tempTessdataDir.mkdirs(); + } + + ClassLoader classLoader = Application.class.getClassLoader(); + for (String filename : languageFiles) { + // 使用正确的资源路径(注意开头的/) + try (InputStream in = classLoader.getResourceAsStream("\\tessdata\\" + filename)) { + if (in == null) { + throw new FileNotFoundException("语言包未找到: /tessdata/" + filename); + } + File outFile = new File(tempTessdataDir, filename); + Files.copy(in, outFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + } + return tempTessdataDir; // 返回的是 tessdata 目录本身 + } + } diff --git a/klp-wms/src/main/resources/tessdata/chi_sim.traineddata b/klp-wms/src/main/resources/tessdata/chi_sim.traineddata new file mode 100644 index 00000000..da7fa49d Binary files /dev/null and b/klp-wms/src/main/resources/tessdata/chi_sim.traineddata differ diff --git a/klp-wms/src/main/resources/tessdata/chi_sim_vert.traineddata b/klp-wms/src/main/resources/tessdata/chi_sim_vert.traineddata new file mode 100644 index 00000000..851996ca Binary files /dev/null and b/klp-wms/src/main/resources/tessdata/chi_sim_vert.traineddata differ