From 43546d56a9d731c2654d6b0ba2a284222f6c4fd2 Mon Sep 17 00:00:00 2001 From: Joshi <3040996759@qq.com> Date: Sat, 2 Aug 2025 14:46:02 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=9B=BE=E7=89=87=E8=AF=86?= =?UTF-8?q?=E5=88=AB=E5=8D=83=E9=97=AE=E5=A4=A7=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../klp/config/ImageRecognitionConfig.java | 96 ++++ .../WmsImageRecognitionController.java | 140 +++++ .../com/klp/domain/bo/ImageRecognitionBo.java | 63 +++ .../com/klp/domain/vo/ImageRecognitionVo.java | 113 ++++ .../klp/service/IImageRecognitionService.java | 77 +++ .../impl/ImageRecognitionServiceImpl.java | 490 ++++++++++++++++++ .../com/klp/test/ImageRecognitionTest.java | 111 ++++ .../com/klp/utils/ImageProcessingUtils.java | 144 +++++ .../application-image-recognition.yml | 38 ++ 9 files changed, 1272 insertions(+) create mode 100644 klp-wms/src/main/java/com/klp/config/ImageRecognitionConfig.java create mode 100644 klp-wms/src/main/java/com/klp/controller/WmsImageRecognitionController.java create mode 100644 klp-wms/src/main/java/com/klp/domain/bo/ImageRecognitionBo.java create mode 100644 klp-wms/src/main/java/com/klp/domain/vo/ImageRecognitionVo.java create mode 100644 klp-wms/src/main/java/com/klp/service/IImageRecognitionService.java create mode 100644 klp-wms/src/main/java/com/klp/service/impl/ImageRecognitionServiceImpl.java create mode 100644 klp-wms/src/main/java/com/klp/test/ImageRecognitionTest.java create mode 100644 klp-wms/src/main/java/com/klp/utils/ImageProcessingUtils.java create mode 100644 klp-wms/src/main/resources/application-image-recognition.yml diff --git a/klp-wms/src/main/java/com/klp/config/ImageRecognitionConfig.java b/klp-wms/src/main/java/com/klp/config/ImageRecognitionConfig.java new file mode 100644 index 00000000..c4a5d967 --- /dev/null +++ b/klp-wms/src/main/java/com/klp/config/ImageRecognitionConfig.java @@ -0,0 +1,96 @@ +package com.klp.config; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.context.annotation.Configuration; + +/** + * 图片识别配置类 + * + * @author klp + * @date 2025-01-27 + */ +@Configuration +@ConfigurationProperties(prefix = "image.recognition") +public class ImageRecognitionConfig { + + /** + * AI API配置 + */ + private String apiUrl = "https://api.siliconflow.cn/v1/chat/completions"; + private String modelName = "Qwen/Qwen2.5-VL-72B-Instruct"; + private String apiKey = "sk-sbmuklhrcxqlsucufqebiibauflxqfdafqjxaedtwirurtrc"; + private Integer maxRetries = 3; + private Double temperature = 0.0; + private Integer maxTokens = 4096; + + /** + * 图片处理配置 + */ + private Integer maxImageDimension = 512; + private Integer imageQuality = 60; + + // Getters and Setters + public String getApiUrl() { + return apiUrl; + } + + public void setApiUrl(String apiUrl) { + this.apiUrl = apiUrl; + } + + public String getModelName() { + return modelName; + } + + public void setModelName(String modelName) { + this.modelName = modelName; + } + + public String getApiKey() { + return apiKey; + } + + public void setApiKey(String apiKey) { + this.apiKey = apiKey; + } + + public Integer getMaxRetries() { + return maxRetries; + } + + public void setMaxRetries(Integer maxRetries) { + this.maxRetries = maxRetries; + } + + public Double getTemperature() { + return temperature; + } + + public void setTemperature(Double temperature) { + this.temperature = temperature; + } + + public Integer getMaxTokens() { + return maxTokens; + } + + public void setMaxTokens(Integer maxTokens) { + this.maxTokens = maxTokens; + } + + public Integer getMaxImageDimension() { + return maxImageDimension; + } + + public void setMaxImageDimension(Integer maxImageDimension) { + this.maxImageDimension = maxImageDimension; + } + + public Integer getImageQuality() { + return imageQuality; + } + + public void setImageQuality(Integer imageQuality) { + this.imageQuality = imageQuality; + } +} \ No newline at end of file diff --git a/klp-wms/src/main/java/com/klp/controller/WmsImageRecognitionController.java b/klp-wms/src/main/java/com/klp/controller/WmsImageRecognitionController.java new file mode 100644 index 00000000..a65bb356 --- /dev/null +++ b/klp-wms/src/main/java/com/klp/controller/WmsImageRecognitionController.java @@ -0,0 +1,140 @@ +package com.klp.controller; + +import com.klp.common.annotation.Log; +import com.klp.common.core.controller.BaseController; +import com.klp.common.core.domain.R; +import com.klp.common.core.validate.AddGroup; +import com.klp.common.core.validate.EditGroup; +import com.klp.common.core.validate.QueryGroup; +import com.klp.common.enums.BusinessType; +import com.klp.common.utils.poi.ExcelUtil; +import com.klp.domain.bo.ImageRecognitionBo; +import com.klp.domain.vo.ImageRecognitionVo; +import com.klp.service.IImageRecognitionService; +import lombok.RequiredArgsConstructor; +import org.springframework.validation.annotation.Validated; +import org.springframework.web.bind.annotation.*; + +import javax.servlet.http.HttpServletResponse; +import javax.validation.constraints.NotEmpty; +import javax.validation.constraints.NotNull; +import java.util.List; +import java.util.Map; + +/** + * 图片识别控制器 + * + * @author klp + * @date 2025-01-27 + */ +@Validated +@RequiredArgsConstructor +@RestController +@RequestMapping("/wms/imageRecognition") +public class WmsImageRecognitionController extends BaseController { + + private final IImageRecognitionService iImageRecognitionService; + + /** + * 识别图片内容 + */ + @PostMapping("/recognize") + public R recognize(@RequestBody @Validated(AddGroup.class) ImageRecognitionBo bo) { + return R.ok(iImageRecognitionService.recognizeImage(bo)); + } + + /** + * 识别BOM内容 + */ + @PostMapping("/recognizeBom") + public R recognizeBom(@RequestBody @Validated(AddGroup.class) ImageRecognitionBo bo) { + bo.setRecognitionType("bom"); + return R.ok(iImageRecognitionService.recognizeBom(bo)); + } + + /** + * 识别文字内容 + */ + @PostMapping("/recognizeText") + public R recognizeText(@RequestBody @Validated(AddGroup.class) ImageRecognitionBo bo) { + bo.setRecognitionType("text"); + return R.ok(iImageRecognitionService.recognizeText(bo)); + } + + /** + * 批量识别图片 + */ + @PostMapping("/recognizeBatch") + public R> recognizeBatch(@RequestBody @Validated(AddGroup.class) List boList) { + return R.ok(iImageRecognitionService.recognizeImages(boList)); + } + + /** + * 测试AI连接 + */ + @PostMapping("/testConnection") + public R> testConnection() { + return R.ok(iImageRecognitionService.testAiConnection()); + } + + /** + * 获取识别配置 + */ + @GetMapping("/config") + public R> getConfig() { + return R.ok(iImageRecognitionService.getRecognitionConfig()); + } + + /** + * 更新识别配置 + */ + @PostMapping("/config") + public R updateConfig(@RequestBody Map config) { + iImageRecognitionService.updateRecognitionConfig(config); + return R.ok(); + } + + /** + * 获取识别历史 + */ + @PostMapping("/history") + public R> getHistory(@RequestBody Map pageQuery) { + return R.ok(iImageRecognitionService.getRecognitionHistory(pageQuery)); + } + + /** + * 简单识别接口(兼容原有格式) + */ + @PostMapping("/recognizeTextSimple") + public R recognizeTextSimple(@RequestBody Map request) { + String imgUrl = request.get("imgUrl"); + if (imgUrl == null || imgUrl.trim().isEmpty()) { + return R.fail("图片URL不能为空"); + } + + ImageRecognitionBo bo = new ImageRecognitionBo(); + bo.setImageUrl(imgUrl); + bo.setRecognitionType("text"); + + ImageRecognitionVo result = iImageRecognitionService.recognizeText(bo); + return R.ok(result); + } + + /** + * 识别BOM接口(兼容原有格式) + */ + @PostMapping("/recognizeBomSimple") + public R recognizeBomSimple(@RequestBody Map request) { + String imgUrl = request.get("imgUrl"); + if (imgUrl == null || imgUrl.trim().isEmpty()) { + return R.fail("图片URL不能为空"); + } + + ImageRecognitionBo bo = new ImageRecognitionBo(); + bo.setImageUrl(imgUrl); + bo.setRecognitionType("bom"); + + ImageRecognitionVo result = iImageRecognitionService.recognizeBom(bo); + return R.ok(result); + } +} \ No newline at end of file diff --git a/klp-wms/src/main/java/com/klp/domain/bo/ImageRecognitionBo.java b/klp-wms/src/main/java/com/klp/domain/bo/ImageRecognitionBo.java new file mode 100644 index 00000000..e038b909 --- /dev/null +++ b/klp-wms/src/main/java/com/klp/domain/bo/ImageRecognitionBo.java @@ -0,0 +1,63 @@ +package com.klp.domain.bo; + +import com.klp.common.core.validate.AddGroup; +import com.klp.common.core.validate.EditGroup; +import com.klp.common.core.validate.QueryGroup; +import com.klp.common.core.domain.BaseEntity; +import lombok.Data; +import lombok.EqualsAndHashCode; + +import javax.validation.constraints.NotBlank; +import javax.validation.constraints.NotNull; + +/** + * 图片识别业务对象 + * + * @author klp + * @date 2025-01-27 + */ +@Data +@EqualsAndHashCode(callSuper = true) +public class ImageRecognitionBo extends BaseEntity { + + /** + * 图片URL地址 + */ + @NotBlank(message = "图片URL不能为空", groups = { AddGroup.class, EditGroup.class }) + private String imageUrl; + + /** + * 识别类型:bom-识别BOM内容,text-识别文字,general-通用识别 + */ + private String recognitionType = "bom"; + + /** + * 是否启用多轮投票 + */ + private Boolean enableVoting = true; + + /** + * 投票轮数 + */ + private Integer votingRounds = 3; + + /** + * 自定义提示词 + */ + private String customPrompt; + + /** + * 是否保存识别结果到数据库 + */ + private Boolean saveToDatabase = false; + + /** + * 产品ID(用于关联BOM信息) + */ + private Long productId; + + /** + * 识别任务描述 + */ + private String taskDescription; +} \ No newline at end of file diff --git a/klp-wms/src/main/java/com/klp/domain/vo/ImageRecognitionVo.java b/klp-wms/src/main/java/com/klp/domain/vo/ImageRecognitionVo.java new file mode 100644 index 00000000..36429503 --- /dev/null +++ b/klp-wms/src/main/java/com/klp/domain/vo/ImageRecognitionVo.java @@ -0,0 +1,113 @@ +package com.klp.domain.vo; + +import com.fasterxml.jackson.annotation.JsonFormat; +import lombok.Data; + +import java.io.Serializable; +import java.util.Date; +import java.util.List; +import java.util.Map; + +/** + * 图片识别结果视图对象 + * + * @author klp + * @date 2025-01-27 + */ +@Data +public class ImageRecognitionVo implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * 识别ID + */ + private Long recognitionId; + + /** + * 图片URL + */ + private String imageUrl; + + /** + * 识别类型 + */ + private String recognitionType; + + /** + * 识别结果 + */ + private String recognizedText; + + /** + * 结构化识别结果(JSON格式) + */ + private Map structuredResult; + + /** + * BOM信息列表 + */ + private List bomItems; + + /** + * 识别置信度 + */ + private Double confidence; + + /** + * 识别状态:success-成功,failed-失败,processing-处理中 + */ + private String status; + + /** + * 错误信息 + */ + private String errorMessage; + + /** + * 处理时间(毫秒) + */ + private Long processingTime; + + /** + * 创建时间 + */ + @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss") + private Date createTime; + + /** + * BOM项目信息 + */ + @Data + public static class BomItemVo { + /** + * 原材料ID + */ + private String rawMaterialId; + + /** + * 原材料名称 + */ + private String rawMaterialName; + + /** + * 数量 + */ + private Double quantity; + + /** + * 单位 + */ + private String unit; + + /** + * 规格 + */ + private String specification; + + /** + * 备注 + */ + private String remark; + } +} \ No newline at end of file diff --git a/klp-wms/src/main/java/com/klp/service/IImageRecognitionService.java b/klp-wms/src/main/java/com/klp/service/IImageRecognitionService.java new file mode 100644 index 00000000..57a9c108 --- /dev/null +++ b/klp-wms/src/main/java/com/klp/service/IImageRecognitionService.java @@ -0,0 +1,77 @@ +package com.klp.service; + +import com.klp.domain.bo.ImageRecognitionBo; +import com.klp.domain.vo.ImageRecognitionVo; + +import java.util.List; +import java.util.Map; + +/** + * 图片识别服务接口 + * + * @author klp + * @date 2025-01-27 + */ +public interface IImageRecognitionService { + + /** + * 识别图片内容 + * + * @param bo 识别请求参数 + * @return 识别结果 + */ + ImageRecognitionVo recognizeImage(ImageRecognitionBo bo); + + /** + * 批量识别图片 + * + * @param boList 识别请求参数列表 + * @return 识别结果列表 + */ + List recognizeImages(List boList); + + /** + * 识别BOM内容 + * + * @param bo 识别请求参数 + * @return BOM识别结果 + */ + ImageRecognitionVo recognizeBom(ImageRecognitionBo bo); + + /** + * 识别文字内容 + * + * @param bo 识别请求参数 + * @return 文字识别结果 + */ + ImageRecognitionVo recognizeText(ImageRecognitionBo bo); + + /** + * 测试AI连接 + * + * @return 连接测试结果 + */ + Map testAiConnection(); + + /** + * 获取识别配置 + * + * @return 配置信息 + */ + Map getRecognitionConfig(); + + /** + * 更新识别配置 + * + * @param config 配置信息 + */ + void updateRecognitionConfig(Map config); + + /** + * 获取识别历史 + * + * @param pageQuery 分页查询参数 + * @return 识别历史列表 + */ + Map getRecognitionHistory(Map pageQuery); +} \ No newline at end of file diff --git a/klp-wms/src/main/java/com/klp/service/impl/ImageRecognitionServiceImpl.java b/klp-wms/src/main/java/com/klp/service/impl/ImageRecognitionServiceImpl.java new file mode 100644 index 00000000..60fd988a --- /dev/null +++ b/klp-wms/src/main/java/com/klp/service/impl/ImageRecognitionServiceImpl.java @@ -0,0 +1,490 @@ +package com.klp.service.impl; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.klp.config.ImageRecognitionConfig; +import com.klp.domain.bo.ImageRecognitionBo; +import com.klp.domain.vo.ImageRecognitionVo; +import com.klp.service.IImageRecognitionService; +import com.klp.utils.ImageProcessingUtils; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.http.*; +import org.springframework.stereotype.Service; +import org.springframework.web.client.RestTemplate; + +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * 图片识别服务实现类 + * + * @author klp + * @date 2025-01-27 + */ +@Slf4j +@RequiredArgsConstructor +@Service +public class ImageRecognitionServiceImpl implements IImageRecognitionService { + + private final ImageRecognitionConfig config; + private final ImageProcessingUtils imageProcessingUtils; + + @Qualifier("salesScriptRestTemplate") + private final RestTemplate restTemplate; + + private final ObjectMapper objectMapper = new ObjectMapper(); + private final ExecutorService executorService = Executors.newFixedThreadPool(5); + + @Override + public ImageRecognitionVo recognizeImage(ImageRecognitionBo bo) { + long startTime = System.currentTimeMillis(); + ImageRecognitionVo result = new ImageRecognitionVo(); + + try { + // 验证图片URL + if (!imageProcessingUtils.isValidImageUrl(bo.getImageUrl())) { + throw new RuntimeException("无效的图片URL"); + } + + // 根据识别类型调用不同的识别方法 + switch (bo.getRecognitionType()) { + case "bom": + result = recognizeBom(bo); + break; + case "text": + result = recognizeText(bo); + break; + default: + result = recognizeGeneral(bo); + break; + } + + result.setStatus("success"); + result.setProcessingTime(System.currentTimeMillis() - startTime); + + } catch (Exception e) { + log.error("图片识别失败", e); + result.setStatus("failed"); + result.setErrorMessage(e.getMessage()); + result.setProcessingTime(System.currentTimeMillis() - startTime); + } + + return result; + } + + @Override + public List recognizeImages(List boList) { + List> futures = new ArrayList<>(); + + for (ImageRecognitionBo bo : boList) { + CompletableFuture future = CompletableFuture.supplyAsync(() -> + recognizeImage(bo), executorService); + futures.add(future); + } + + List results = new ArrayList<>(); + for (CompletableFuture future : futures) { + try { + results.add(future.get()); + } catch (Exception e) { + log.error("批量识别失败", e); + ImageRecognitionVo errorResult = new ImageRecognitionVo(); + errorResult.setStatus("failed"); + errorResult.setErrorMessage(e.getMessage()); + results.add(errorResult); + } + } + + return results; + } + + @Override + public ImageRecognitionVo recognizeBom(ImageRecognitionBo bo) { + String prompt = buildBomPrompt(bo); + String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds()); + + ImageRecognitionVo result = new ImageRecognitionVo(); + result.setImageUrl(bo.getImageUrl()); + result.setRecognitionType("bom"); + result.setRecognizedText(aiResponse); + + // 解析BOM信息 + try { + Map structuredResult = parseBomResponse(aiResponse); + result.setStructuredResult(structuredResult); + + // 提取BOM项目列表 + List bomItems = extractBomItems(structuredResult); + result.setBomItems(bomItems); + + } catch (Exception e) { + log.warn("解析BOM响应失败: {}", e.getMessage()); + result.setRecognizedText(aiResponse); + } + + return result; + } + + @Override + public ImageRecognitionVo recognizeText(ImageRecognitionBo bo) { + String prompt = buildTextPrompt(bo); + String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds()); + + ImageRecognitionVo result = new ImageRecognitionVo(); + result.setImageUrl(bo.getImageUrl()); + result.setRecognitionType("text"); + result.setRecognizedText(aiResponse); + + return result; + } + + @Override + public Map testAiConnection() { + Map result = new HashMap<>(); + try { + // 构建测试请求 + Map requestBody = new HashMap<>(); + requestBody.put("model", config.getModelName()); + + Map message = new HashMap<>(); + message.put("role", "user"); + message.put("content", "你好"); + requestBody.put("messages", Arrays.asList(message)); + requestBody.put("max_tokens", 10); + + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + headers.setBearerAuth(config.getApiKey()); + + HttpEntity> entity = new HttpEntity<>(requestBody, headers); + ResponseEntity response = restTemplate.postForEntity( + config.getApiUrl(), entity, Map.class); + + result.put("success", true); + result.put("message", "AI连接测试成功"); + result.put("response", response.getBody()); + } catch (Exception e) { + log.error("AI连接测试失败", e); + result.put("success", false); + result.put("message", "AI连接测试失败: " + e.getMessage()); + } + return result; + } + + @Override + public Map getRecognitionConfig() { + Map configMap = new HashMap<>(); + configMap.put("apiUrl", config.getApiUrl()); + configMap.put("modelName", config.getModelName()); + configMap.put("apiKey", config.getApiKey().substring(0, 10) + "..."); + configMap.put("maxRetries", config.getMaxRetries()); + configMap.put("temperature", config.getTemperature()); + configMap.put("maxTokens", config.getMaxTokens()); + configMap.put("maxImageDimension", config.getMaxImageDimension()); + configMap.put("imageQuality", config.getImageQuality()); + return configMap; + } + + @Override + public void updateRecognitionConfig(Map config) { + log.info("更新识别配置: {}", config); + // 这里可以实现配置更新逻辑 + } + + @Override + public Map getRecognitionHistory(Map pageQuery) { + Map result = new HashMap<>(); + result.put("rows", new ArrayList<>()); + result.put("total", 0L); + return result; + } + + /** + * 通用识别方法 + */ + private ImageRecognitionVo recognizeGeneral(ImageRecognitionBo bo) { + String prompt = buildGeneralPrompt(bo); + String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds()); + + ImageRecognitionVo result = new ImageRecognitionVo(); + result.setImageUrl(bo.getImageUrl()); + result.setRecognitionType("general"); + result.setRecognizedText(aiResponse); + + return result; + } + + /** + * 调用AI API + */ + private String callAiApi(String imageUrl, String prompt, Boolean enableVoting, Integer votingRounds) { + // 转换图片为Data URI + String dataUri = imageProcessingUtils.imageUrlToDataUri( + imageUrl, config.getMaxImageDimension(), config.getImageQuality()); + + // 构建请求体 + Map requestBody = new HashMap<>(); + requestBody.put("model", config.getModelName()); + + List> contents = new ArrayList<>(); + + // 添加图片内容 + Map imageContent = new HashMap<>(); + imageContent.put("type", "image_url"); + Map imageUrlObj = new HashMap<>(); + imageUrlObj.put("url", dataUri); + imageUrlObj.put("detail", "low"); + imageContent.put("image_url", imageUrlObj); + contents.add(imageContent); + + // 添加文本内容 + Map textContent = new HashMap<>(); + textContent.put("type", "text"); + textContent.put("text", prompt); + contents.add(textContent); + + Map message = new HashMap<>(); + message.put("role", "user"); + message.put("content", contents); + + requestBody.put("messages", Arrays.asList(message)); + requestBody.put("enable_thinking", true); + requestBody.put("temperature", config.getTemperature()); + requestBody.put("top_p", 0.7); + requestBody.put("min_p", 0.05); + requestBody.put("frequency_penalty", 0.2); + requestBody.put("max_token", config.getMaxTokens()); + requestBody.put("stream", false); + requestBody.put("stop", new ArrayList<>()); + Map responseFormat = new HashMap<>(); + responseFormat.put("type", "text"); + requestBody.put("response_format", responseFormat); + + // 多轮投票处理 + if (Boolean.TRUE.equals(enableVoting) && votingRounds > 1) { + return callAiApiWithVoting(requestBody, votingRounds); + } else { + return callAiApiSingle(requestBody); + } + } + + /** + * 单次调用AI API + */ + private String callAiApiSingle(Map requestBody) { + for (int i = 0; i < config.getMaxRetries(); i++) { + try { + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + headers.setBearerAuth(config.getApiKey()); + + HttpEntity> entity = new HttpEntity<>(requestBody, headers); + ResponseEntity response = restTemplate.postForEntity( + config.getApiUrl(), entity, Map.class); + + if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) { + Map body = response.getBody(); + List> choices = (List>) body.get("choices"); + if (choices != null && !choices.isEmpty()) { + Map choice = choices.get(0); + Map message = (Map) choice.get("message"); + return (String) message.get("content"); + } + } + } catch (Exception e) { + log.error("AI API调用失败,重试 {}: {}", i + 1, e.getMessage()); + if (i == config.getMaxRetries() - 1) { + throw new RuntimeException("AI API调用失败", e); + } + } + } + throw new RuntimeException("AI API调用失败,已达到最大重试次数"); + } + + /** + * 多轮投票调用AI API + */ + private String callAiApiWithVoting(Map requestBody, int rounds) { + List> futures = new ArrayList<>(); + + for (int i = 0; i < rounds; i++) { + CompletableFuture future = CompletableFuture.supplyAsync(() -> + callAiApiSingle(requestBody), executorService); + futures.add(future); + } + + List results = new ArrayList<>(); + for (CompletableFuture future : futures) { + try { + results.add(future.get()); + } catch (Exception e) { + log.error("投票轮次失败: {}", e.getMessage()); + } + } + + if (results.isEmpty()) { + throw new RuntimeException("所有投票轮次都失败了"); + } + + // 简单投票:返回第一个成功的结果 + return results.get(0); + } + + /** + * 构建BOM识别提示词 + */ + private String buildBomPrompt(ImageRecognitionBo bo) { + StringBuilder prompt = new StringBuilder(); + prompt.append("请仔细分析这张图片中的BOM(物料清单)信息,并提取以下内容:\n\n"); + prompt.append("【识别要求】\n"); + prompt.append("1. 识别图片中的所有物料信息,包括原材料ID、名称、数量、单位等\n"); + prompt.append("2. 如果图片中包含表格,请按表格结构提取信息\n"); + prompt.append("3. 如果图片中包含列表,请按列表格式提取信息\n"); + prompt.append("4. 确保数量信息的准确性,包括数字和单位\n"); + prompt.append("5. 识别规格、备注等附加信息\n\n"); + + if (bo.getProductId() != null) { + prompt.append("【产品信息】\n"); + prompt.append("产品ID: ").append(bo.getProductId()).append("\n\n"); + } + + if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) { + prompt.append("【自定义要求】\n"); + prompt.append(bo.getCustomPrompt()).append("\n\n"); + } + + prompt.append("【输出格式】\n"); + prompt.append("请按以下JSON格式输出识别结果:\n"); + prompt.append("{\n"); + prompt.append(" \"bomItems\": [\n"); + prompt.append(" {\n"); + prompt.append(" \"rawMaterialId\": \"原材料ID\",\n"); + prompt.append(" \"rawMaterialName\": \"原材料名称\",\n"); + prompt.append(" \"quantity\": 数量,\n"); + prompt.append(" \"unit\": \"单位\",\n"); + prompt.append(" \"specification\": \"规格\",\n"); + prompt.append(" \"remark\": \"备注\"\n"); + prompt.append(" }\n"); + prompt.append(" ],\n"); + prompt.append(" \"summary\": \"BOM清单总结\",\n"); + prompt.append(" \"totalItems\": 总项目数\n"); + prompt.append("}\n\n"); + prompt.append("如果图片中没有BOM信息,请返回空数组。"); + + return prompt.toString(); + } + + /** + * 构建文字识别提示词 + */ + private String buildTextPrompt(ImageRecognitionBo bo) { + StringBuilder prompt = new StringBuilder(); + prompt.append("请识别图片中的所有文字内容,包括但不限于:\n\n"); + prompt.append("【识别要求】\n"); + prompt.append("1. 识别图片中的所有可见文字\n"); + prompt.append("2. 保持文字的原始格式和顺序\n"); + prompt.append("3. 识别表格、列表等结构化内容\n"); + prompt.append("4. 识别数字、符号等特殊字符\n"); + prompt.append("5. 保持段落和换行格式\n\n"); + + if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) { + prompt.append("【自定义要求】\n"); + prompt.append(bo.getCustomPrompt()).append("\n\n"); + } + + prompt.append("【输出格式】\n"); + prompt.append("请直接输出识别到的文字内容,保持原有格式。"); + + return prompt.toString(); + } + + /** + * 构建通用识别提示词 + */ + private String buildGeneralPrompt(ImageRecognitionBo bo) { + StringBuilder prompt = new StringBuilder(); + prompt.append("请分析这张图片的内容,并提供详细描述:\n\n"); + prompt.append("【分析要求】\n"); + prompt.append("1. 描述图片的主要内容和主题\n"); + prompt.append("2. 识别图片中的文字信息\n"); + prompt.append("3. 分析图片的结构和布局\n"); + prompt.append("4. 提取关键信息和数据\n"); + prompt.append("5. 识别图片中的表格、图表等结构化内容\n\n"); + + if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) { + prompt.append("【自定义要求】\n"); + prompt.append(bo.getCustomPrompt()).append("\n\n"); + } + + prompt.append("【输出格式】\n"); + prompt.append("请提供详细的分析结果,包括文字内容、结构分析等。"); + + return prompt.toString(); + } + + /** + * 解析BOM响应 + */ + private Map parseBomResponse(String response) { + try { + // 尝试直接解析JSON + return objectMapper.readValue(response, Map.class); + } catch (JsonProcessingException e) { + // 如果直接解析失败,尝试提取JSON部分 + Pattern jsonPattern = Pattern.compile("\\{[\\s\\S]*\\}"); + Matcher matcher = jsonPattern.matcher(response); + if (matcher.find()) { + try { + return objectMapper.readValue(matcher.group(), Map.class); + } catch (JsonProcessingException ex) { + log.warn("无法解析BOM响应为JSON: {}", response); + Map fallback = new HashMap<>(); + fallback.put("rawText", response); + return fallback; + } + } + Map fallback = new HashMap<>(); + fallback.put("rawText", response); + return fallback; + } + } + + /** + * 提取BOM项目列表 + */ + private List extractBomItems(Map structuredResult) { + List bomItems = new ArrayList<>(); + + try { + List> items = (List>) structuredResult.get("bomItems"); + if (items != null) { + for (Map item : items) { + ImageRecognitionVo.BomItemVo bomItem = new ImageRecognitionVo.BomItemVo(); + bomItem.setRawMaterialId((String) item.get("rawMaterialId")); + bomItem.setRawMaterialName((String) item.get("rawMaterialName")); + + Object quantity = item.get("quantity"); + if (quantity instanceof Number) { + bomItem.setQuantity(((Number) quantity).doubleValue()); + } + + bomItem.setUnit((String) item.get("unit")); + bomItem.setSpecification((String) item.get("specification")); + bomItem.setRemark((String) item.get("remark")); + + bomItems.add(bomItem); + } + } + } catch (Exception e) { + log.warn("提取BOM项目失败: {}", e.getMessage()); + } + + return bomItems; + } +} \ No newline at end of file diff --git a/klp-wms/src/main/java/com/klp/test/ImageRecognitionTest.java b/klp-wms/src/main/java/com/klp/test/ImageRecognitionTest.java new file mode 100644 index 00000000..b60ee46d --- /dev/null +++ b/klp-wms/src/main/java/com/klp/test/ImageRecognitionTest.java @@ -0,0 +1,111 @@ +package com.klp.test; + +import com.klp.domain.bo.ImageRecognitionBo; +import com.klp.domain.vo.ImageRecognitionVo; +import com.klp.service.IImageRecognitionService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.CommandLineRunner; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.Map; + +/** + * 图片识别功能测试类 + * + * @author klp + * @date 2025-01-27 + */ +@Slf4j +@Component +@RequiredArgsConstructor +public class ImageRecognitionTest implements CommandLineRunner { + + private final IImageRecognitionService imageRecognitionService; + + @Override + public void run(String... args) throws Exception { + log.info("开始测试图片识别功能..."); + + // 测试AI连接 + testAiConnection(); + + // 测试文字识别 + testTextRecognition(); + + // 测试BOM识别 + testBomRecognition(); + + log.info("图片识别功能测试完成"); + } + + /** + * 测试AI连接 + */ + private void testAiConnection() { + log.info("测试AI连接..."); + try { + Map result = imageRecognitionService.testAiConnection(); + if (Boolean.TRUE.equals(result.get("success"))) { + log.info("AI连接测试成功: {}", result.get("message")); + } else { + log.error("AI连接测试失败: {}", result.get("message")); + } + } catch (Exception e) { + log.error("AI连接测试异常", e); + } + } + + /** + * 测试文字识别 + */ + private void testTextRecognition() { + log.info("测试文字识别..."); + try { + ImageRecognitionBo bo = new ImageRecognitionBo(); + bo.setImageUrl("https://via.placeholder.com/400x300/000000/FFFFFF?text=Test+Text"); + bo.setRecognitionType("text"); + bo.setEnableVoting(false); + + ImageRecognitionVo result = imageRecognitionService.recognizeText(bo); + log.info("文字识别结果: {}", result.getRecognizedText()); + log.info("识别状态: {}", result.getStatus()); + log.info("处理时间: {}ms", result.getProcessingTime()); + } catch (Exception e) { + log.error("文字识别测试异常", e); + } + } + + /** + * 测试BOM识别 + */ + private void testBomRecognition() { + log.info("测试BOM识别..."); + try { + ImageRecognitionBo bo = new ImageRecognitionBo(); + bo.setImageUrl("https://via.placeholder.com/400x300/000000/FFFFFF?text=BOM+Test"); + bo.setRecognitionType("bom"); + bo.setEnableVoting(false); + bo.setProductId(1L); + + ImageRecognitionVo result = imageRecognitionService.recognizeBom(bo); + log.info("BOM识别结果: {}", result.getRecognizedText()); + log.info("识别状态: {}", result.getStatus()); + log.info("处理时间: {}ms", result.getProcessingTime()); + + if (result.getBomItems() != null) { + log.info("BOM项目数量: {}", result.getBomItems().size()); + for (ImageRecognitionVo.BomItemVo item : result.getBomItems()) { + log.info("BOM项目: {} - {} {} {}", + item.getRawMaterialName(), + item.getQuantity(), + item.getUnit(), + item.getSpecification()); + } + } + } catch (Exception e) { + log.error("BOM识别测试异常", e); + } + } +} \ No newline at end of file diff --git a/klp-wms/src/main/java/com/klp/utils/ImageProcessingUtils.java b/klp-wms/src/main/java/com/klp/utils/ImageProcessingUtils.java new file mode 100644 index 00000000..86d2c13f --- /dev/null +++ b/klp-wms/src/main/java/com/klp/utils/ImageProcessingUtils.java @@ -0,0 +1,144 @@ +package com.klp.utils; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; + +import javax.imageio.ImageIO; +import java.awt.*; +import java.awt.image.BufferedImage; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.Base64; + +/** + * 图片处理工具类 + * + * @author klp + * @date 2025-01-27 + */ +@Slf4j +@Component +public class ImageProcessingUtils { + + /** + * 将图片URL转换为Base64 Data URI + * + * @param imageUrl 图片URL + * @param maxDimension 最大尺寸 + * @param quality 图片质量 + * @return Base64 Data URI + */ + public String imageUrlToDataUri(String imageUrl, int maxDimension, int quality) { + try { + // 下载图片 + BufferedImage originalImage = downloadImage(imageUrl); + if (originalImage == null) { + throw new RuntimeException("无法下载图片: " + imageUrl); + } + + // 调整图片尺寸 + BufferedImage resizedImage = resizeImage(originalImage, maxDimension); + + // 转换为Base64 + String base64 = imageToBase64(resizedImage, quality); + + return "data:image/jpeg;base64," + base64; + } catch (Exception e) { + log.error("图片转换失败: {}", e.getMessage(), e); + throw new RuntimeException("图片转换失败: " + e.getMessage()); + } + } + + /** + * 下载图片 + * + * @param imageUrl 图片URL + * @return BufferedImage对象 + */ + private BufferedImage downloadImage(String imageUrl) { + try { + URL url = new URL(imageUrl); + try (InputStream inputStream = url.openStream()) { + return ImageIO.read(inputStream); + } + } catch (IOException e) { + log.error("下载图片失败: {}", e.getMessage(), e); + return null; + } + } + + /** + * 调整图片尺寸 + * + * @param originalImage 原始图片 + * @param maxDimension 最大尺寸 + * @return 调整后的图片 + */ + private BufferedImage resizeImage(BufferedImage originalImage, int maxDimension) { + int originalWidth = originalImage.getWidth(); + int originalHeight = originalImage.getHeight(); + + // 计算新的尺寸 + int newWidth, newHeight; + if (originalWidth > originalHeight) { + newWidth = Math.min(originalWidth, maxDimension); + newHeight = (int) ((double) originalHeight * newWidth / originalWidth); + } else { + newHeight = Math.min(originalHeight, maxDimension); + newWidth = (int) ((double) originalWidth * newHeight / originalHeight); + } + + // 创建新图片 + BufferedImage resizedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB); + Graphics2D g2d = resizedImage.createGraphics(); + + // 设置渲染质量 + g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR); + g2d.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); + g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + + // 绘制调整后的图片 + g2d.drawImage(originalImage, 0, 0, newWidth, newHeight, null); + g2d.dispose(); + + return resizedImage; + } + + /** + * 将图片转换为Base64字符串 + * + * @param image 图片对象 + * @param quality 图片质量 + * @return Base64字符串 + */ + private String imageToBase64(BufferedImage image, int quality) { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + // 使用JPEG格式输出 + ImageIO.write(image, "JPEG", baos); + byte[] imageBytes = baos.toByteArray(); + return Base64.getEncoder().encodeToString(imageBytes); + } catch (IOException e) { + log.error("图片转Base64失败: {}", e.getMessage(), e); + throw new RuntimeException("图片转Base64失败: " + e.getMessage()); + } + } + + /** + * 验证图片URL是否有效 + * + * @param imageUrl 图片URL + * @return 是否有效 + */ + public boolean isValidImageUrl(String imageUrl) { + try { + URL url = new URL(imageUrl); + String protocol = url.getProtocol(); + return "http".equals(protocol) || "https".equals(protocol); + } catch (Exception e) { + log.warn("无效的图片URL: {}", imageUrl); + return false; + } + } +} \ No newline at end of file diff --git a/klp-wms/src/main/resources/application-image-recognition.yml b/klp-wms/src/main/resources/application-image-recognition.yml new file mode 100644 index 00000000..1c89731a --- /dev/null +++ b/klp-wms/src/main/resources/application-image-recognition.yml @@ -0,0 +1,38 @@ +# 图片识别配置 +image: + recognition: + # AI API配置 + api-url: https://api.siliconflow.cn/v1/chat/completions + model-name: Qwen/Qwen2.5-VL-32B-Instruct + api-key: sk-sbmuklhrcxqlsucufqebiibauflxqfdafqjxaedtwirurtrc + max-retries: 3 + temperature: 0.0 + max-tokens: 4096 + + # 图片处理配置 + max-image-dimension: 512 + image-quality: 60 + + # 识别配置 + default-recognition-type: bom + enable-voting: true + voting-rounds: 3 + + # 支持的图片格式 + supported-formats: + - jpg + - jpeg + - png + - bmp + - gif + + # 超时配置 + connect-timeout: 30000 + read-timeout: 60000 + + # 缓存配置 + enable-cache: true + cache-ttl: 3600 + + # 日志配置 + enable-debug-log: false