Files
klp-oa/klp-wms/src/main/java/com/klp/service/impl/ImageRecognitionServiceImpl.java

569 lines
23 KiB
Java
Raw Normal View History

2025-08-02 14:46:02 +08:00
package com.klp.service.impl;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.klp.config.ImageRecognitionConfig;
import com.klp.domain.bo.ImageRecognitionBo;
import com.klp.domain.vo.ImageRecognitionVo;
import com.klp.service.IImageRecognitionService;
import com.klp.utils.ImageProcessingUtils;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.http.*;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 图片识别服务实现类
*
* @author klp
* @date 2025-01-27
*/
@Slf4j
@RequiredArgsConstructor
@Service
public class ImageRecognitionServiceImpl implements IImageRecognitionService {
private final ImageRecognitionConfig config;
private final ImageProcessingUtils imageProcessingUtils;
@Qualifier("salesScriptRestTemplate")
private final RestTemplate restTemplate;
private final ObjectMapper objectMapper = new ObjectMapper();
private final ExecutorService executorService = Executors.newFixedThreadPool(5);
@Override
public ImageRecognitionVo recognizeImage(ImageRecognitionBo bo) {
long startTime = System.currentTimeMillis();
ImageRecognitionVo result = new ImageRecognitionVo();
try {
// 验证图片URL
if (!imageProcessingUtils.isValidImageUrl(bo.getImageUrl())) {
throw new RuntimeException("无效的图片URL");
}
// 根据识别类型调用不同的识别方法
switch (bo.getRecognitionType()) {
case "bom":
result = recognizeBom(bo);
break;
case "text":
result = recognizeText(bo);
break;
default:
result = recognizeGeneral(bo);
break;
}
result.setStatus("success");
result.setProcessingTime(System.currentTimeMillis() - startTime);
} catch (Exception e) {
log.error("图片识别失败", e);
result.setStatus("failed");
result.setErrorMessage(e.getMessage());
result.setProcessingTime(System.currentTimeMillis() - startTime);
}
return result;
}
@Override
public List<ImageRecognitionVo> recognizeImages(List<ImageRecognitionBo> boList) {
List<CompletableFuture<ImageRecognitionVo>> futures = new ArrayList<>();
for (ImageRecognitionBo bo : boList) {
CompletableFuture<ImageRecognitionVo> future = CompletableFuture.supplyAsync(() ->
recognizeImage(bo), executorService);
futures.add(future);
}
List<ImageRecognitionVo> results = new ArrayList<>();
for (CompletableFuture<ImageRecognitionVo> future : futures) {
try {
results.add(future.get());
} catch (Exception e) {
log.error("批量识别失败", e);
ImageRecognitionVo errorResult = new ImageRecognitionVo();
errorResult.setStatus("failed");
errorResult.setErrorMessage(e.getMessage());
results.add(errorResult);
}
}
return results;
}
@Override
public ImageRecognitionVo recognizeBom(ImageRecognitionBo bo) {
String prompt = buildBomPrompt(bo);
String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds());
ImageRecognitionVo result = new ImageRecognitionVo();
result.setImageUrl(bo.getImageUrl());
result.setRecognitionType("bom");
result.setRecognizedText(aiResponse);
2025-08-02 15:13:09 +08:00
// 解析识别结果
2025-08-02 14:46:02 +08:00
try {
Map<String, Object> structuredResult = parseBomResponse(aiResponse);
result.setStructuredResult(structuredResult);
// 提取BOM项目列表
List<ImageRecognitionVo.BomItemVo> bomItems = extractBomItems(structuredResult);
result.setBomItems(bomItems);
2025-08-02 15:13:09 +08:00
// 提取属性列表
List<ImageRecognitionVo.AttributeVo> attributes = extractAttributes(structuredResult);
result.setAttributes(attributes);
2025-08-02 14:46:02 +08:00
} catch (Exception e) {
2025-08-02 15:13:09 +08:00
log.warn("解析识别响应失败: {}", e.getMessage());
2025-08-02 14:46:02 +08:00
result.setRecognizedText(aiResponse);
}
return result;
}
@Override
public ImageRecognitionVo recognizeText(ImageRecognitionBo bo) {
String prompt = buildTextPrompt(bo);
String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds());
ImageRecognitionVo result = new ImageRecognitionVo();
result.setImageUrl(bo.getImageUrl());
result.setRecognitionType("text");
result.setRecognizedText(aiResponse);
return result;
}
@Override
public Map<String, Object> testAiConnection() {
Map<String, Object> result = new HashMap<>();
try {
// 构建测试请求
Map<String, Object> requestBody = new HashMap<>();
requestBody.put("model", config.getModelName());
Map<String, String> message = new HashMap<>();
message.put("role", "user");
message.put("content", "你好");
requestBody.put("messages", Arrays.asList(message));
requestBody.put("max_tokens", 10);
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.setBearerAuth(config.getApiKey());
HttpEntity<Map<String, Object>> entity = new HttpEntity<>(requestBody, headers);
ResponseEntity<Map> response = restTemplate.postForEntity(
config.getApiUrl(), entity, Map.class);
result.put("success", true);
result.put("message", "AI连接测试成功");
result.put("response", response.getBody());
} catch (Exception e) {
log.error("AI连接测试失败", e);
result.put("success", false);
result.put("message", "AI连接测试失败: " + e.getMessage());
}
return result;
}
@Override
public Map<String, Object> getRecognitionConfig() {
Map<String, Object> configMap = new HashMap<>();
configMap.put("apiUrl", config.getApiUrl());
configMap.put("modelName", config.getModelName());
configMap.put("apiKey", config.getApiKey().substring(0, 10) + "...");
configMap.put("maxRetries", config.getMaxRetries());
configMap.put("temperature", config.getTemperature());
configMap.put("maxTokens", config.getMaxTokens());
configMap.put("maxImageDimension", config.getMaxImageDimension());
configMap.put("imageQuality", config.getImageQuality());
return configMap;
}
@Override
public void updateRecognitionConfig(Map<String, Object> config) {
log.info("更新识别配置: {}", config);
// 这里可以实现配置更新逻辑
}
@Override
public Map<String, Object> getRecognitionHistory(Map<String, Object> pageQuery) {
Map<String, Object> result = new HashMap<>();
result.put("rows", new ArrayList<>());
result.put("total", 0L);
return result;
}
/**
* 通用识别方法
*/
private ImageRecognitionVo recognizeGeneral(ImageRecognitionBo bo) {
String prompt = buildGeneralPrompt(bo);
String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds());
ImageRecognitionVo result = new ImageRecognitionVo();
result.setImageUrl(bo.getImageUrl());
result.setRecognitionType("general");
result.setRecognizedText(aiResponse);
return result;
}
/**
* 调用AI API
*/
private String callAiApi(String imageUrl, String prompt, Boolean enableVoting, Integer votingRounds) {
// 转换图片为Data URI
String dataUri = imageProcessingUtils.imageUrlToDataUri(
imageUrl, config.getMaxImageDimension(), config.getImageQuality());
// 构建请求体
Map<String, Object> requestBody = new HashMap<>();
requestBody.put("model", config.getModelName());
List<Map<String, Object>> contents = new ArrayList<>();
// 添加图片内容
Map<String, Object> imageContent = new HashMap<>();
imageContent.put("type", "image_url");
Map<String, Object> imageUrlObj = new HashMap<>();
imageUrlObj.put("url", dataUri);
imageUrlObj.put("detail", "low");
imageContent.put("image_url", imageUrlObj);
contents.add(imageContent);
// 添加文本内容
Map<String, Object> textContent = new HashMap<>();
textContent.put("type", "text");
textContent.put("text", prompt);
contents.add(textContent);
Map<String, Object> message = new HashMap<>();
message.put("role", "user");
message.put("content", contents);
requestBody.put("messages", Arrays.asList(message));
requestBody.put("enable_thinking", true);
requestBody.put("temperature", config.getTemperature());
requestBody.put("top_p", 0.7);
requestBody.put("min_p", 0.05);
requestBody.put("frequency_penalty", 0.2);
requestBody.put("max_token", config.getMaxTokens());
requestBody.put("stream", false);
requestBody.put("stop", new ArrayList<>());
Map<String, String> responseFormat = new HashMap<>();
responseFormat.put("type", "text");
requestBody.put("response_format", responseFormat);
// 多轮投票处理
if (Boolean.TRUE.equals(enableVoting) && votingRounds > 1) {
return callAiApiWithVoting(requestBody, votingRounds);
} else {
return callAiApiSingle(requestBody);
}
}
/**
* 单次调用AI API
*/
private String callAiApiSingle(Map<String, Object> requestBody) {
for (int i = 0; i < config.getMaxRetries(); i++) {
try {
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.setBearerAuth(config.getApiKey());
HttpEntity<Map<String, Object>> entity = new HttpEntity<>(requestBody, headers);
ResponseEntity<Map> response = restTemplate.postForEntity(
config.getApiUrl(), entity, Map.class);
if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) {
Map<String, Object> body = response.getBody();
List<Map<String, Object>> choices = (List<Map<String, Object>>) body.get("choices");
if (choices != null && !choices.isEmpty()) {
Map<String, Object> choice = choices.get(0);
Map<String, Object> message = (Map<String, Object>) choice.get("message");
return (String) message.get("content");
}
}
} catch (Exception e) {
log.error("AI API调用失败重试 {}: {}", i + 1, e.getMessage());
if (i == config.getMaxRetries() - 1) {
throw new RuntimeException("AI API调用失败", e);
}
}
}
throw new RuntimeException("AI API调用失败已达到最大重试次数");
}
/**
* 多轮投票调用AI API
*/
private String callAiApiWithVoting(Map<String, Object> requestBody, int rounds) {
List<CompletableFuture<String>> futures = new ArrayList<>();
for (int i = 0; i < rounds; i++) {
CompletableFuture<String> future = CompletableFuture.supplyAsync(() ->
callAiApiSingle(requestBody), executorService);
futures.add(future);
}
List<String> results = new ArrayList<>();
for (CompletableFuture<String> future : futures) {
try {
results.add(future.get());
} catch (Exception e) {
log.error("投票轮次失败: {}", e.getMessage());
}
}
if (results.isEmpty()) {
throw new RuntimeException("所有投票轮次都失败了");
}
// 简单投票:返回第一个成功的结果
return results.get(0);
}
/**
* 构建BOM识别提示词
*/
private String buildBomPrompt(ImageRecognitionBo bo) {
StringBuilder prompt = new StringBuilder();
2025-08-02 15:13:09 +08:00
prompt.append("请仔细分析这张图片中的内容,并提取所有相关信息:\n\n");
2025-08-02 14:46:02 +08:00
prompt.append("【识别要求】\n");
2025-08-02 15:13:09 +08:00
prompt.append("1. 如果图片包含BOM物料清单信息请提取原材料ID、名称、数量、单位等\n");
prompt.append("2. 如果图片包含其他类型的信息(如检验报告、产品信息等),请提取所有相关属性\n");
prompt.append("3. 识别图片中的所有文字信息,包括标题、内容、表格数据等\n");
prompt.append("4. 提取关键信息点,如产品名称、规格参数、检验结果等\n");
prompt.append("5. 确保信息的准确性和完整性\n\n");
2025-08-02 14:46:02 +08:00
if (bo.getProductId() != null) {
prompt.append("【产品信息】\n");
prompt.append("产品ID: ").append(bo.getProductId()).append("\n\n");
}
if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) {
prompt.append("【自定义要求】\n");
prompt.append(bo.getCustomPrompt()).append("\n\n");
}
prompt.append("【输出格式】\n");
prompt.append("请按以下JSON格式输出识别结果\n");
prompt.append("{\n");
prompt.append(" \"bomItems\": [\n");
prompt.append(" {\n");
prompt.append(" \"rawMaterialId\": \"原材料ID\",\n");
prompt.append(" \"rawMaterialName\": \"原材料名称\",\n");
prompt.append(" \"quantity\": 数量,\n");
prompt.append(" \"unit\": \"单位\",\n");
prompt.append(" \"specification\": \"规格\",\n");
prompt.append(" \"remark\": \"备注\"\n");
prompt.append(" }\n");
prompt.append(" ],\n");
2025-08-02 15:13:09 +08:00
prompt.append(" \"attributes\": [\n");
prompt.append(" {\n");
prompt.append(" \"attrKey\": \"属性名称\",\n");
prompt.append(" \"attrValue\": \"属性值\"\n");
prompt.append(" }\n");
prompt.append(" ],\n");
prompt.append(" \"summary\": \"内容总结\",\n");
2025-08-02 14:46:02 +08:00
prompt.append(" \"totalItems\": 总项目数\n");
prompt.append("}\n\n");
2025-08-02 15:13:09 +08:00
prompt.append("请将识别到的所有信息整理成属性数组每个属性包含attrKey属性名称和attrValue属性值");
2025-08-02 14:46:02 +08:00
return prompt.toString();
}
/**
* 构建文字识别提示词
*/
private String buildTextPrompt(ImageRecognitionBo bo) {
StringBuilder prompt = new StringBuilder();
prompt.append("请识别图片中的所有文字内容,包括但不限于:\n\n");
prompt.append("【识别要求】\n");
prompt.append("1. 识别图片中的所有可见文字\n");
prompt.append("2. 保持文字的原始格式和顺序\n");
prompt.append("3. 识别表格、列表等结构化内容\n");
prompt.append("4. 识别数字、符号等特殊字符\n");
prompt.append("5. 保持段落和换行格式\n\n");
if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) {
prompt.append("【自定义要求】\n");
prompt.append(bo.getCustomPrompt()).append("\n\n");
}
prompt.append("【输出格式】\n");
prompt.append("请直接输出识别到的文字内容,保持原有格式。");
return prompt.toString();
}
/**
* 构建通用识别提示词
*/
private String buildGeneralPrompt(ImageRecognitionBo bo) {
StringBuilder prompt = new StringBuilder();
prompt.append("请分析这张图片的内容,并提供详细描述:\n\n");
prompt.append("【分析要求】\n");
prompt.append("1. 描述图片的主要内容和主题\n");
prompt.append("2. 识别图片中的文字信息\n");
prompt.append("3. 分析图片的结构和布局\n");
prompt.append("4. 提取关键信息和数据\n");
prompt.append("5. 识别图片中的表格、图表等结构化内容\n\n");
if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) {
prompt.append("【自定义要求】\n");
prompt.append(bo.getCustomPrompt()).append("\n\n");
}
prompt.append("【输出格式】\n");
prompt.append("请提供详细的分析结果,包括文字内容、结构分析等。");
return prompt.toString();
}
/**
* 解析BOM响应
*/
private Map<String, Object> parseBomResponse(String response) {
try {
// 尝试直接解析JSON
return objectMapper.readValue(response, Map.class);
} catch (JsonProcessingException e) {
// 如果直接解析失败尝试提取JSON部分
Pattern jsonPattern = Pattern.compile("\\{[\\s\\S]*\\}");
Matcher matcher = jsonPattern.matcher(response);
if (matcher.find()) {
try {
return objectMapper.readValue(matcher.group(), Map.class);
} catch (JsonProcessingException ex) {
log.warn("无法解析BOM响应为JSON: {}", response);
Map<String, Object> fallback = new HashMap<>();
fallback.put("rawText", response);
return fallback;
}
}
Map<String, Object> fallback = new HashMap<>();
fallback.put("rawText", response);
return fallback;
}
}
/**
* 提取BOM项目列表
*/
private List<ImageRecognitionVo.BomItemVo> extractBomItems(Map<String, Object> structuredResult) {
List<ImageRecognitionVo.BomItemVo> bomItems = new ArrayList<>();
try {
List<Map<String, Object>> items = (List<Map<String, Object>>) structuredResult.get("bomItems");
if (items != null) {
for (Map<String, Object> item : items) {
ImageRecognitionVo.BomItemVo bomItem = new ImageRecognitionVo.BomItemVo();
bomItem.setRawMaterialId((String) item.get("rawMaterialId"));
bomItem.setRawMaterialName((String) item.get("rawMaterialName"));
Object quantity = item.get("quantity");
if (quantity instanceof Number) {
bomItem.setQuantity(((Number) quantity).doubleValue());
}
bomItem.setUnit((String) item.get("unit"));
bomItem.setSpecification((String) item.get("specification"));
bomItem.setRemark((String) item.get("remark"));
bomItems.add(bomItem);
}
}
} catch (Exception e) {
log.warn("提取BOM项目失败: {}", e.getMessage());
}
return bomItems;
}
2025-08-02 15:13:09 +08:00
/**
* 提取属性列表
*/
private List<ImageRecognitionVo.AttributeVo> extractAttributes(Map<String, Object> structuredResult) {
List<ImageRecognitionVo.AttributeVo> attributes = new ArrayList<>();
try {
List<Map<String, Object>> attrList = (List<Map<String, Object>>) structuredResult.get("attributes");
if (attrList != null) {
for (Map<String, Object> attr : attrList) {
ImageRecognitionVo.AttributeVo attribute = new ImageRecognitionVo.AttributeVo();
attribute.setAttrKey((String) attr.get("attrKey"));
attribute.setAttrValue((String) attr.get("attrValue"));
attributes.add(attribute);
}
}
// 如果没有attributes字段尝试从其他字段生成属性
if (attributes.isEmpty()) {
attributes = generateAttributesFromResult(structuredResult);
}
} catch (Exception e) {
log.warn("提取属性失败: {}", e.getMessage());
}
return attributes;
}
/**
* 从识别结果生成属性列表
*/
private List<ImageRecognitionVo.AttributeVo> generateAttributesFromResult(Map<String, Object> structuredResult) {
List<ImageRecognitionVo.AttributeVo> attributes = new ArrayList<>();
try {
// 提取summary作为内容总结
String summary = (String) structuredResult.get("summary");
if (summary != null && !summary.isEmpty()) {
ImageRecognitionVo.AttributeVo summaryAttr = new ImageRecognitionVo.AttributeVo();
summaryAttr.setAttrKey("内容总结");
summaryAttr.setAttrValue(summary);
attributes.add(summaryAttr);
}
// 提取totalItems
Object totalItems = structuredResult.get("totalItems");
if (totalItems != null) {
ImageRecognitionVo.AttributeVo totalAttr = new ImageRecognitionVo.AttributeVo();
totalAttr.setAttrKey("总项目数");
totalAttr.setAttrValue(String.valueOf(totalItems));
attributes.add(totalAttr);
}
// 提取bomItems数量
List<Map<String, Object>> bomItems = (List<Map<String, Object>>) structuredResult.get("bomItems");
if (bomItems != null && !bomItems.isEmpty()) {
ImageRecognitionVo.AttributeVo bomCountAttr = new ImageRecognitionVo.AttributeVo();
bomCountAttr.setAttrKey("BOM项目数");
bomCountAttr.setAttrValue(String.valueOf(bomItems.size()));
attributes.add(bomCountAttr);
}
} catch (Exception e) {
log.warn("生成属性失败: {}", e.getMessage());
}
return attributes;
}
2025-08-02 14:46:02 +08:00
}