feat: OCR接口
This commit is contained in:
@@ -7,5 +7,5 @@ public interface ITesseractOcrService {
|
||||
* @param imageUrl 图片URL
|
||||
* @return 识别结果
|
||||
*/
|
||||
String recognizeText(String imageUrl) throws Exception;
|
||||
String recognizeText(String imageUrl);
|
||||
}
|
||||
|
||||
@@ -4,12 +4,14 @@ import com.klp.common.config.TesseractConfig;
|
||||
import com.klp.service.ITesseractOcrService;
|
||||
import net.sourceforge.tess4j.ITesseract;
|
||||
import net.sourceforge.tess4j.Tesseract;
|
||||
import net.sourceforge.tess4j.TesseractException;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import javax.imageio.ImageIO;
|
||||
import java.awt.*;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
|
||||
@@ -19,12 +21,17 @@ public class TesseractOcrServiceImpl implements ITesseractOcrService {
|
||||
@Resource
|
||||
private TesseractConfig tesseractConfig;
|
||||
@Override
|
||||
public String recognizeText(String imageUrl) throws Exception {
|
||||
public String recognizeText(String imageUrl){
|
||||
// 读取网络图片为 BufferedImage
|
||||
URL url = new URL(imageUrl);
|
||||
InputStream inputStream = url.openStream();
|
||||
BufferedImage image = ImageIO.read(inputStream);
|
||||
inputStream.close();
|
||||
BufferedImage image = null;
|
||||
try {
|
||||
URL url = new URL(imageUrl);
|
||||
InputStream inputStream = url.openStream();
|
||||
image = ImageIO.read(inputStream);
|
||||
inputStream.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
// 预处理图片
|
||||
BufferedImage bufferedImage = preprocessImage(image);
|
||||
System.out.println("开始OCR识别...");
|
||||
@@ -38,7 +45,12 @@ public class TesseractOcrServiceImpl implements ITesseractOcrService {
|
||||
tesseract.setPageSegMode(6); // 假设统一的文本块
|
||||
tesseract.setOcrEngineMode(3); // 使用默认引擎
|
||||
// 执行OCR识别图片
|
||||
String result = tesseract.doOCR(bufferedImage);
|
||||
String result = null;
|
||||
try {
|
||||
result = tesseract.doOCR(bufferedImage);
|
||||
} catch (TesseractException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// 清理和格式化结果
|
||||
String cleanedResult = cleanOcrResult(result);
|
||||
|
||||
Reference in New Issue
Block a user