feat: OCR接口

This commit is contained in:
JR
2025-08-02 10:38:28 +08:00
parent 16c8bc593a
commit 044a2269af
3 changed files with 33 additions and 7 deletions

View File

@@ -7,5 +7,5 @@ public interface ITesseractOcrService {
* @param imageUrl 图片URL
* @return 识别结果
*/
String recognizeText(String imageUrl) throws Exception;
String recognizeText(String imageUrl);
}

View File

@@ -4,12 +4,14 @@ import com.klp.common.config.TesseractConfig;
import com.klp.service.ITesseractOcrService;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
@@ -19,12 +21,17 @@ public class TesseractOcrServiceImpl implements ITesseractOcrService {
@Resource
private TesseractConfig tesseractConfig;
@Override
public String recognizeText(String imageUrl) throws Exception {
public String recognizeText(String imageUrl){
// 读取网络图片为 BufferedImage
URL url = new URL(imageUrl);
InputStream inputStream = url.openStream();
BufferedImage image = ImageIO.read(inputStream);
inputStream.close();
BufferedImage image = null;
try {
URL url = new URL(imageUrl);
InputStream inputStream = url.openStream();
image = ImageIO.read(inputStream);
inputStream.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
// 预处理图片
BufferedImage bufferedImage = preprocessImage(image);
System.out.println("开始OCR识别...");
@@ -38,7 +45,12 @@ public class TesseractOcrServiceImpl implements ITesseractOcrService {
tesseract.setPageSegMode(6); // 假设统一的文本块
tesseract.setOcrEngineMode(3); // 使用默认引擎
// 执行OCR识别图片
String result = tesseract.doOCR(bufferedImage);
String result = null;
try {
result = tesseract.doOCR(bufferedImage);
} catch (TesseractException e) {
throw new RuntimeException(e);
}
// 清理和格式化结果
String cleanedResult = cleanOcrResult(result);