添加图片识别千问大模型

This commit is contained in:
2025-08-02 14:46:02 +08:00
parent 840be2a338
commit 43546d56a9
9 changed files with 1272 additions and 0 deletions

View File

@@ -0,0 +1,96 @@
package com.klp.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
/**
* 图片识别配置类
*
* @author klp
* @date 2025-01-27
*/
@Configuration
@ConfigurationProperties(prefix = "image.recognition")
public class ImageRecognitionConfig {
/**
* AI API配置
*/
private String apiUrl = "https://api.siliconflow.cn/v1/chat/completions";
private String modelName = "Qwen/Qwen2.5-VL-72B-Instruct";
private String apiKey = "sk-sbmuklhrcxqlsucufqebiibauflxqfdafqjxaedtwirurtrc";
private Integer maxRetries = 3;
private Double temperature = 0.0;
private Integer maxTokens = 4096;
/**
* 图片处理配置
*/
private Integer maxImageDimension = 512;
private Integer imageQuality = 60;
// Getters and Setters
public String getApiUrl() {
return apiUrl;
}
public void setApiUrl(String apiUrl) {
this.apiUrl = apiUrl;
}
public String getModelName() {
return modelName;
}
public void setModelName(String modelName) {
this.modelName = modelName;
}
public String getApiKey() {
return apiKey;
}
public void setApiKey(String apiKey) {
this.apiKey = apiKey;
}
public Integer getMaxRetries() {
return maxRetries;
}
public void setMaxRetries(Integer maxRetries) {
this.maxRetries = maxRetries;
}
public Double getTemperature() {
return temperature;
}
public void setTemperature(Double temperature) {
this.temperature = temperature;
}
public Integer getMaxTokens() {
return maxTokens;
}
public void setMaxTokens(Integer maxTokens) {
this.maxTokens = maxTokens;
}
public Integer getMaxImageDimension() {
return maxImageDimension;
}
public void setMaxImageDimension(Integer maxImageDimension) {
this.maxImageDimension = maxImageDimension;
}
public Integer getImageQuality() {
return imageQuality;
}
public void setImageQuality(Integer imageQuality) {
this.imageQuality = imageQuality;
}
}

View File

@@ -0,0 +1,140 @@
package com.klp.controller;
import com.klp.common.annotation.Log;
import com.klp.common.core.controller.BaseController;
import com.klp.common.core.domain.R;
import com.klp.common.core.validate.AddGroup;
import com.klp.common.core.validate.EditGroup;
import com.klp.common.core.validate.QueryGroup;
import com.klp.common.enums.BusinessType;
import com.klp.common.utils.poi.ExcelUtil;
import com.klp.domain.bo.ImageRecognitionBo;
import com.klp.domain.vo.ImageRecognitionVo;
import com.klp.service.IImageRecognitionService;
import lombok.RequiredArgsConstructor;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
import javax.servlet.http.HttpServletResponse;
import javax.validation.constraints.NotEmpty;
import javax.validation.constraints.NotNull;
import java.util.List;
import java.util.Map;
/**
* 图片识别控制器
*
* @author klp
* @date 2025-01-27
*/
@Validated
@RequiredArgsConstructor
@RestController
@RequestMapping("/wms/imageRecognition")
public class WmsImageRecognitionController extends BaseController {
private final IImageRecognitionService iImageRecognitionService;
/**
* 识别图片内容
*/
@PostMapping("/recognize")
public R<ImageRecognitionVo> recognize(@RequestBody @Validated(AddGroup.class) ImageRecognitionBo bo) {
return R.ok(iImageRecognitionService.recognizeImage(bo));
}
/**
* 识别BOM内容
*/
@PostMapping("/recognizeBom")
public R<ImageRecognitionVo> recognizeBom(@RequestBody @Validated(AddGroup.class) ImageRecognitionBo bo) {
bo.setRecognitionType("bom");
return R.ok(iImageRecognitionService.recognizeBom(bo));
}
/**
* 识别文字内容
*/
@PostMapping("/recognizeText")
public R<ImageRecognitionVo> recognizeText(@RequestBody @Validated(AddGroup.class) ImageRecognitionBo bo) {
bo.setRecognitionType("text");
return R.ok(iImageRecognitionService.recognizeText(bo));
}
/**
* 批量识别图片
*/
@PostMapping("/recognizeBatch")
public R<List<ImageRecognitionVo>> recognizeBatch(@RequestBody @Validated(AddGroup.class) List<ImageRecognitionBo> boList) {
return R.ok(iImageRecognitionService.recognizeImages(boList));
}
/**
* 测试AI连接
*/
@PostMapping("/testConnection")
public R<Map<String, Object>> testConnection() {
return R.ok(iImageRecognitionService.testAiConnection());
}
/**
* 获取识别配置
*/
@GetMapping("/config")
public R<Map<String, Object>> getConfig() {
return R.ok(iImageRecognitionService.getRecognitionConfig());
}
/**
* 更新识别配置
*/
@PostMapping("/config")
public R<Void> updateConfig(@RequestBody Map<String, Object> config) {
iImageRecognitionService.updateRecognitionConfig(config);
return R.ok();
}
/**
* 获取识别历史
*/
@PostMapping("/history")
public R<Map<String, Object>> getHistory(@RequestBody Map<String, Object> pageQuery) {
return R.ok(iImageRecognitionService.getRecognitionHistory(pageQuery));
}
/**
* 简单识别接口(兼容原有格式)
*/
@PostMapping("/recognizeTextSimple")
public R<ImageRecognitionVo> recognizeTextSimple(@RequestBody Map<String, String> request) {
String imgUrl = request.get("imgUrl");
if (imgUrl == null || imgUrl.trim().isEmpty()) {
return R.fail("图片URL不能为空");
}
ImageRecognitionBo bo = new ImageRecognitionBo();
bo.setImageUrl(imgUrl);
bo.setRecognitionType("text");
ImageRecognitionVo result = iImageRecognitionService.recognizeText(bo);
return R.ok(result);
}
/**
* 识别BOM接口兼容原有格式
*/
@PostMapping("/recognizeBomSimple")
public R<ImageRecognitionVo> recognizeBomSimple(@RequestBody Map<String, String> request) {
String imgUrl = request.get("imgUrl");
if (imgUrl == null || imgUrl.trim().isEmpty()) {
return R.fail("图片URL不能为空");
}
ImageRecognitionBo bo = new ImageRecognitionBo();
bo.setImageUrl(imgUrl);
bo.setRecognitionType("bom");
ImageRecognitionVo result = iImageRecognitionService.recognizeBom(bo);
return R.ok(result);
}
}

View File

@@ -0,0 +1,63 @@
package com.klp.domain.bo;
import com.klp.common.core.validate.AddGroup;
import com.klp.common.core.validate.EditGroup;
import com.klp.common.core.validate.QueryGroup;
import com.klp.common.core.domain.BaseEntity;
import lombok.Data;
import lombok.EqualsAndHashCode;
import javax.validation.constraints.NotBlank;
import javax.validation.constraints.NotNull;
/**
* 图片识别业务对象
*
* @author klp
* @date 2025-01-27
*/
@Data
@EqualsAndHashCode(callSuper = true)
public class ImageRecognitionBo extends BaseEntity {
/**
* 图片URL地址
*/
@NotBlank(message = "图片URL不能为空", groups = { AddGroup.class, EditGroup.class })
private String imageUrl;
/**
* 识别类型bom-识别BOM内容text-识别文字general-通用识别
*/
private String recognitionType = "bom";
/**
* 是否启用多轮投票
*/
private Boolean enableVoting = true;
/**
* 投票轮数
*/
private Integer votingRounds = 3;
/**
* 自定义提示词
*/
private String customPrompt;
/**
* 是否保存识别结果到数据库
*/
private Boolean saveToDatabase = false;
/**
* 产品ID用于关联BOM信息
*/
private Long productId;
/**
* 识别任务描述
*/
private String taskDescription;
}

View File

@@ -0,0 +1,113 @@
package com.klp.domain.vo;
import com.fasterxml.jackson.annotation.JsonFormat;
import lombok.Data;
import java.io.Serializable;
import java.util.Date;
import java.util.List;
import java.util.Map;
/**
* 图片识别结果视图对象
*
* @author klp
* @date 2025-01-27
*/
@Data
public class ImageRecognitionVo implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 识别ID
*/
private Long recognitionId;
/**
* 图片URL
*/
private String imageUrl;
/**
* 识别类型
*/
private String recognitionType;
/**
* 识别结果
*/
private String recognizedText;
/**
* 结构化识别结果JSON格式
*/
private Map<String, Object> structuredResult;
/**
* BOM信息列表
*/
private List<BomItemVo> bomItems;
/**
* 识别置信度
*/
private Double confidence;
/**
* 识别状态success-成功failed-失败processing-处理中
*/
private String status;
/**
* 错误信息
*/
private String errorMessage;
/**
* 处理时间(毫秒)
*/
private Long processingTime;
/**
* 创建时间
*/
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
private Date createTime;
/**
* BOM项目信息
*/
@Data
public static class BomItemVo {
/**
* 原材料ID
*/
private String rawMaterialId;
/**
* 原材料名称
*/
private String rawMaterialName;
/**
* 数量
*/
private Double quantity;
/**
* 单位
*/
private String unit;
/**
* 规格
*/
private String specification;
/**
* 备注
*/
private String remark;
}
}

View File

@@ -0,0 +1,77 @@
package com.klp.service;
import com.klp.domain.bo.ImageRecognitionBo;
import com.klp.domain.vo.ImageRecognitionVo;
import java.util.List;
import java.util.Map;
/**
* 图片识别服务接口
*
* @author klp
* @date 2025-01-27
*/
public interface IImageRecognitionService {
/**
* 识别图片内容
*
* @param bo 识别请求参数
* @return 识别结果
*/
ImageRecognitionVo recognizeImage(ImageRecognitionBo bo);
/**
* 批量识别图片
*
* @param boList 识别请求参数列表
* @return 识别结果列表
*/
List<ImageRecognitionVo> recognizeImages(List<ImageRecognitionBo> boList);
/**
* 识别BOM内容
*
* @param bo 识别请求参数
* @return BOM识别结果
*/
ImageRecognitionVo recognizeBom(ImageRecognitionBo bo);
/**
* 识别文字内容
*
* @param bo 识别请求参数
* @return 文字识别结果
*/
ImageRecognitionVo recognizeText(ImageRecognitionBo bo);
/**
* 测试AI连接
*
* @return 连接测试结果
*/
Map<String, Object> testAiConnection();
/**
* 获取识别配置
*
* @return 配置信息
*/
Map<String, Object> getRecognitionConfig();
/**
* 更新识别配置
*
* @param config 配置信息
*/
void updateRecognitionConfig(Map<String, Object> config);
/**
* 获取识别历史
*
* @param pageQuery 分页查询参数
* @return 识别历史列表
*/
Map<String, Object> getRecognitionHistory(Map<String, Object> pageQuery);
}

View File

@@ -0,0 +1,490 @@
package com.klp.service.impl;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.klp.config.ImageRecognitionConfig;
import com.klp.domain.bo.ImageRecognitionBo;
import com.klp.domain.vo.ImageRecognitionVo;
import com.klp.service.IImageRecognitionService;
import com.klp.utils.ImageProcessingUtils;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.http.*;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 图片识别服务实现类
*
* @author klp
* @date 2025-01-27
*/
@Slf4j
@RequiredArgsConstructor
@Service
public class ImageRecognitionServiceImpl implements IImageRecognitionService {
private final ImageRecognitionConfig config;
private final ImageProcessingUtils imageProcessingUtils;
@Qualifier("salesScriptRestTemplate")
private final RestTemplate restTemplate;
private final ObjectMapper objectMapper = new ObjectMapper();
private final ExecutorService executorService = Executors.newFixedThreadPool(5);
@Override
public ImageRecognitionVo recognizeImage(ImageRecognitionBo bo) {
long startTime = System.currentTimeMillis();
ImageRecognitionVo result = new ImageRecognitionVo();
try {
// 验证图片URL
if (!imageProcessingUtils.isValidImageUrl(bo.getImageUrl())) {
throw new RuntimeException("无效的图片URL");
}
// 根据识别类型调用不同的识别方法
switch (bo.getRecognitionType()) {
case "bom":
result = recognizeBom(bo);
break;
case "text":
result = recognizeText(bo);
break;
default:
result = recognizeGeneral(bo);
break;
}
result.setStatus("success");
result.setProcessingTime(System.currentTimeMillis() - startTime);
} catch (Exception e) {
log.error("图片识别失败", e);
result.setStatus("failed");
result.setErrorMessage(e.getMessage());
result.setProcessingTime(System.currentTimeMillis() - startTime);
}
return result;
}
@Override
public List<ImageRecognitionVo> recognizeImages(List<ImageRecognitionBo> boList) {
List<CompletableFuture<ImageRecognitionVo>> futures = new ArrayList<>();
for (ImageRecognitionBo bo : boList) {
CompletableFuture<ImageRecognitionVo> future = CompletableFuture.supplyAsync(() ->
recognizeImage(bo), executorService);
futures.add(future);
}
List<ImageRecognitionVo> results = new ArrayList<>();
for (CompletableFuture<ImageRecognitionVo> future : futures) {
try {
results.add(future.get());
} catch (Exception e) {
log.error("批量识别失败", e);
ImageRecognitionVo errorResult = new ImageRecognitionVo();
errorResult.setStatus("failed");
errorResult.setErrorMessage(e.getMessage());
results.add(errorResult);
}
}
return results;
}
@Override
public ImageRecognitionVo recognizeBom(ImageRecognitionBo bo) {
String prompt = buildBomPrompt(bo);
String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds());
ImageRecognitionVo result = new ImageRecognitionVo();
result.setImageUrl(bo.getImageUrl());
result.setRecognitionType("bom");
result.setRecognizedText(aiResponse);
// 解析BOM信息
try {
Map<String, Object> structuredResult = parseBomResponse(aiResponse);
result.setStructuredResult(structuredResult);
// 提取BOM项目列表
List<ImageRecognitionVo.BomItemVo> bomItems = extractBomItems(structuredResult);
result.setBomItems(bomItems);
} catch (Exception e) {
log.warn("解析BOM响应失败: {}", e.getMessage());
result.setRecognizedText(aiResponse);
}
return result;
}
@Override
public ImageRecognitionVo recognizeText(ImageRecognitionBo bo) {
String prompt = buildTextPrompt(bo);
String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds());
ImageRecognitionVo result = new ImageRecognitionVo();
result.setImageUrl(bo.getImageUrl());
result.setRecognitionType("text");
result.setRecognizedText(aiResponse);
return result;
}
@Override
public Map<String, Object> testAiConnection() {
Map<String, Object> result = new HashMap<>();
try {
// 构建测试请求
Map<String, Object> requestBody = new HashMap<>();
requestBody.put("model", config.getModelName());
Map<String, String> message = new HashMap<>();
message.put("role", "user");
message.put("content", "你好");
requestBody.put("messages", Arrays.asList(message));
requestBody.put("max_tokens", 10);
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.setBearerAuth(config.getApiKey());
HttpEntity<Map<String, Object>> entity = new HttpEntity<>(requestBody, headers);
ResponseEntity<Map> response = restTemplate.postForEntity(
config.getApiUrl(), entity, Map.class);
result.put("success", true);
result.put("message", "AI连接测试成功");
result.put("response", response.getBody());
} catch (Exception e) {
log.error("AI连接测试失败", e);
result.put("success", false);
result.put("message", "AI连接测试失败: " + e.getMessage());
}
return result;
}
@Override
public Map<String, Object> getRecognitionConfig() {
Map<String, Object> configMap = new HashMap<>();
configMap.put("apiUrl", config.getApiUrl());
configMap.put("modelName", config.getModelName());
configMap.put("apiKey", config.getApiKey().substring(0, 10) + "...");
configMap.put("maxRetries", config.getMaxRetries());
configMap.put("temperature", config.getTemperature());
configMap.put("maxTokens", config.getMaxTokens());
configMap.put("maxImageDimension", config.getMaxImageDimension());
configMap.put("imageQuality", config.getImageQuality());
return configMap;
}
@Override
public void updateRecognitionConfig(Map<String, Object> config) {
log.info("更新识别配置: {}", config);
// 这里可以实现配置更新逻辑
}
@Override
public Map<String, Object> getRecognitionHistory(Map<String, Object> pageQuery) {
Map<String, Object> result = new HashMap<>();
result.put("rows", new ArrayList<>());
result.put("total", 0L);
return result;
}
/**
* 通用识别方法
*/
private ImageRecognitionVo recognizeGeneral(ImageRecognitionBo bo) {
String prompt = buildGeneralPrompt(bo);
String aiResponse = callAiApi(bo.getImageUrl(), prompt, bo.getEnableVoting(), bo.getVotingRounds());
ImageRecognitionVo result = new ImageRecognitionVo();
result.setImageUrl(bo.getImageUrl());
result.setRecognitionType("general");
result.setRecognizedText(aiResponse);
return result;
}
/**
* 调用AI API
*/
private String callAiApi(String imageUrl, String prompt, Boolean enableVoting, Integer votingRounds) {
// 转换图片为Data URI
String dataUri = imageProcessingUtils.imageUrlToDataUri(
imageUrl, config.getMaxImageDimension(), config.getImageQuality());
// 构建请求体
Map<String, Object> requestBody = new HashMap<>();
requestBody.put("model", config.getModelName());
List<Map<String, Object>> contents = new ArrayList<>();
// 添加图片内容
Map<String, Object> imageContent = new HashMap<>();
imageContent.put("type", "image_url");
Map<String, Object> imageUrlObj = new HashMap<>();
imageUrlObj.put("url", dataUri);
imageUrlObj.put("detail", "low");
imageContent.put("image_url", imageUrlObj);
contents.add(imageContent);
// 添加文本内容
Map<String, Object> textContent = new HashMap<>();
textContent.put("type", "text");
textContent.put("text", prompt);
contents.add(textContent);
Map<String, Object> message = new HashMap<>();
message.put("role", "user");
message.put("content", contents);
requestBody.put("messages", Arrays.asList(message));
requestBody.put("enable_thinking", true);
requestBody.put("temperature", config.getTemperature());
requestBody.put("top_p", 0.7);
requestBody.put("min_p", 0.05);
requestBody.put("frequency_penalty", 0.2);
requestBody.put("max_token", config.getMaxTokens());
requestBody.put("stream", false);
requestBody.put("stop", new ArrayList<>());
Map<String, String> responseFormat = new HashMap<>();
responseFormat.put("type", "text");
requestBody.put("response_format", responseFormat);
// 多轮投票处理
if (Boolean.TRUE.equals(enableVoting) && votingRounds > 1) {
return callAiApiWithVoting(requestBody, votingRounds);
} else {
return callAiApiSingle(requestBody);
}
}
/**
* 单次调用AI API
*/
private String callAiApiSingle(Map<String, Object> requestBody) {
for (int i = 0; i < config.getMaxRetries(); i++) {
try {
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
headers.setBearerAuth(config.getApiKey());
HttpEntity<Map<String, Object>> entity = new HttpEntity<>(requestBody, headers);
ResponseEntity<Map> response = restTemplate.postForEntity(
config.getApiUrl(), entity, Map.class);
if (response.getStatusCode() == HttpStatus.OK && response.getBody() != null) {
Map<String, Object> body = response.getBody();
List<Map<String, Object>> choices = (List<Map<String, Object>>) body.get("choices");
if (choices != null && !choices.isEmpty()) {
Map<String, Object> choice = choices.get(0);
Map<String, Object> message = (Map<String, Object>) choice.get("message");
return (String) message.get("content");
}
}
} catch (Exception e) {
log.error("AI API调用失败重试 {}: {}", i + 1, e.getMessage());
if (i == config.getMaxRetries() - 1) {
throw new RuntimeException("AI API调用失败", e);
}
}
}
throw new RuntimeException("AI API调用失败已达到最大重试次数");
}
/**
* 多轮投票调用AI API
*/
private String callAiApiWithVoting(Map<String, Object> requestBody, int rounds) {
List<CompletableFuture<String>> futures = new ArrayList<>();
for (int i = 0; i < rounds; i++) {
CompletableFuture<String> future = CompletableFuture.supplyAsync(() ->
callAiApiSingle(requestBody), executorService);
futures.add(future);
}
List<String> results = new ArrayList<>();
for (CompletableFuture<String> future : futures) {
try {
results.add(future.get());
} catch (Exception e) {
log.error("投票轮次失败: {}", e.getMessage());
}
}
if (results.isEmpty()) {
throw new RuntimeException("所有投票轮次都失败了");
}
// 简单投票:返回第一个成功的结果
return results.get(0);
}
/**
* 构建BOM识别提示词
*/
private String buildBomPrompt(ImageRecognitionBo bo) {
StringBuilder prompt = new StringBuilder();
prompt.append("请仔细分析这张图片中的BOM物料清单信息并提取以下内容\n\n");
prompt.append("【识别要求】\n");
prompt.append("1. 识别图片中的所有物料信息包括原材料ID、名称、数量、单位等\n");
prompt.append("2. 如果图片中包含表格,请按表格结构提取信息\n");
prompt.append("3. 如果图片中包含列表,请按列表格式提取信息\n");
prompt.append("4. 确保数量信息的准确性,包括数字和单位\n");
prompt.append("5. 识别规格、备注等附加信息\n\n");
if (bo.getProductId() != null) {
prompt.append("【产品信息】\n");
prompt.append("产品ID: ").append(bo.getProductId()).append("\n\n");
}
if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) {
prompt.append("【自定义要求】\n");
prompt.append(bo.getCustomPrompt()).append("\n\n");
}
prompt.append("【输出格式】\n");
prompt.append("请按以下JSON格式输出识别结果\n");
prompt.append("{\n");
prompt.append(" \"bomItems\": [\n");
prompt.append(" {\n");
prompt.append(" \"rawMaterialId\": \"原材料ID\",\n");
prompt.append(" \"rawMaterialName\": \"原材料名称\",\n");
prompt.append(" \"quantity\": 数量,\n");
prompt.append(" \"unit\": \"单位\",\n");
prompt.append(" \"specification\": \"规格\",\n");
prompt.append(" \"remark\": \"备注\"\n");
prompt.append(" }\n");
prompt.append(" ],\n");
prompt.append(" \"summary\": \"BOM清单总结\",\n");
prompt.append(" \"totalItems\": 总项目数\n");
prompt.append("}\n\n");
prompt.append("如果图片中没有BOM信息请返回空数组。");
return prompt.toString();
}
/**
* 构建文字识别提示词
*/
private String buildTextPrompt(ImageRecognitionBo bo) {
StringBuilder prompt = new StringBuilder();
prompt.append("请识别图片中的所有文字内容,包括但不限于:\n\n");
prompt.append("【识别要求】\n");
prompt.append("1. 识别图片中的所有可见文字\n");
prompt.append("2. 保持文字的原始格式和顺序\n");
prompt.append("3. 识别表格、列表等结构化内容\n");
prompt.append("4. 识别数字、符号等特殊字符\n");
prompt.append("5. 保持段落和换行格式\n\n");
if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) {
prompt.append("【自定义要求】\n");
prompt.append(bo.getCustomPrompt()).append("\n\n");
}
prompt.append("【输出格式】\n");
prompt.append("请直接输出识别到的文字内容,保持原有格式。");
return prompt.toString();
}
/**
* 构建通用识别提示词
*/
private String buildGeneralPrompt(ImageRecognitionBo bo) {
StringBuilder prompt = new StringBuilder();
prompt.append("请分析这张图片的内容,并提供详细描述:\n\n");
prompt.append("【分析要求】\n");
prompt.append("1. 描述图片的主要内容和主题\n");
prompt.append("2. 识别图片中的文字信息\n");
prompt.append("3. 分析图片的结构和布局\n");
prompt.append("4. 提取关键信息和数据\n");
prompt.append("5. 识别图片中的表格、图表等结构化内容\n\n");
if (bo.getCustomPrompt() != null && !bo.getCustomPrompt().isEmpty()) {
prompt.append("【自定义要求】\n");
prompt.append(bo.getCustomPrompt()).append("\n\n");
}
prompt.append("【输出格式】\n");
prompt.append("请提供详细的分析结果,包括文字内容、结构分析等。");
return prompt.toString();
}
/**
* 解析BOM响应
*/
private Map<String, Object> parseBomResponse(String response) {
try {
// 尝试直接解析JSON
return objectMapper.readValue(response, Map.class);
} catch (JsonProcessingException e) {
// 如果直接解析失败尝试提取JSON部分
Pattern jsonPattern = Pattern.compile("\\{[\\s\\S]*\\}");
Matcher matcher = jsonPattern.matcher(response);
if (matcher.find()) {
try {
return objectMapper.readValue(matcher.group(), Map.class);
} catch (JsonProcessingException ex) {
log.warn("无法解析BOM响应为JSON: {}", response);
Map<String, Object> fallback = new HashMap<>();
fallback.put("rawText", response);
return fallback;
}
}
Map<String, Object> fallback = new HashMap<>();
fallback.put("rawText", response);
return fallback;
}
}
/**
* 提取BOM项目列表
*/
private List<ImageRecognitionVo.BomItemVo> extractBomItems(Map<String, Object> structuredResult) {
List<ImageRecognitionVo.BomItemVo> bomItems = new ArrayList<>();
try {
List<Map<String, Object>> items = (List<Map<String, Object>>) structuredResult.get("bomItems");
if (items != null) {
for (Map<String, Object> item : items) {
ImageRecognitionVo.BomItemVo bomItem = new ImageRecognitionVo.BomItemVo();
bomItem.setRawMaterialId((String) item.get("rawMaterialId"));
bomItem.setRawMaterialName((String) item.get("rawMaterialName"));
Object quantity = item.get("quantity");
if (quantity instanceof Number) {
bomItem.setQuantity(((Number) quantity).doubleValue());
}
bomItem.setUnit((String) item.get("unit"));
bomItem.setSpecification((String) item.get("specification"));
bomItem.setRemark((String) item.get("remark"));
bomItems.add(bomItem);
}
}
} catch (Exception e) {
log.warn("提取BOM项目失败: {}", e.getMessage());
}
return bomItems;
}
}

View File

@@ -0,0 +1,111 @@
package com.klp.test;
import com.klp.domain.bo.ImageRecognitionBo;
import com.klp.domain.vo.ImageRecognitionVo;
import com.klp.service.IImageRecognitionService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.Map;
/**
* 图片识别功能测试类
*
* @author klp
* @date 2025-01-27
*/
@Slf4j
@Component
@RequiredArgsConstructor
public class ImageRecognitionTest implements CommandLineRunner {
private final IImageRecognitionService imageRecognitionService;
@Override
public void run(String... args) throws Exception {
log.info("开始测试图片识别功能...");
// 测试AI连接
testAiConnection();
// 测试文字识别
testTextRecognition();
// 测试BOM识别
testBomRecognition();
log.info("图片识别功能测试完成");
}
/**
* 测试AI连接
*/
private void testAiConnection() {
log.info("测试AI连接...");
try {
Map<String, Object> result = imageRecognitionService.testAiConnection();
if (Boolean.TRUE.equals(result.get("success"))) {
log.info("AI连接测试成功: {}", result.get("message"));
} else {
log.error("AI连接测试失败: {}", result.get("message"));
}
} catch (Exception e) {
log.error("AI连接测试异常", e);
}
}
/**
* 测试文字识别
*/
private void testTextRecognition() {
log.info("测试文字识别...");
try {
ImageRecognitionBo bo = new ImageRecognitionBo();
bo.setImageUrl("https://via.placeholder.com/400x300/000000/FFFFFF?text=Test+Text");
bo.setRecognitionType("text");
bo.setEnableVoting(false);
ImageRecognitionVo result = imageRecognitionService.recognizeText(bo);
log.info("文字识别结果: {}", result.getRecognizedText());
log.info("识别状态: {}", result.getStatus());
log.info("处理时间: {}ms", result.getProcessingTime());
} catch (Exception e) {
log.error("文字识别测试异常", e);
}
}
/**
* 测试BOM识别
*/
private void testBomRecognition() {
log.info("测试BOM识别...");
try {
ImageRecognitionBo bo = new ImageRecognitionBo();
bo.setImageUrl("https://via.placeholder.com/400x300/000000/FFFFFF?text=BOM+Test");
bo.setRecognitionType("bom");
bo.setEnableVoting(false);
bo.setProductId(1L);
ImageRecognitionVo result = imageRecognitionService.recognizeBom(bo);
log.info("BOM识别结果: {}", result.getRecognizedText());
log.info("识别状态: {}", result.getStatus());
log.info("处理时间: {}ms", result.getProcessingTime());
if (result.getBomItems() != null) {
log.info("BOM项目数量: {}", result.getBomItems().size());
for (ImageRecognitionVo.BomItemVo item : result.getBomItems()) {
log.info("BOM项目: {} - {} {} {}",
item.getRawMaterialName(),
item.getQuantity(),
item.getUnit(),
item.getSpecification());
}
}
} catch (Exception e) {
log.error("BOM识别测试异常", e);
}
}
}

View File

@@ -0,0 +1,144 @@
package com.klp.utils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Base64;
/**
* 图片处理工具类
*
* @author klp
* @date 2025-01-27
*/
@Slf4j
@Component
public class ImageProcessingUtils {
/**
* 将图片URL转换为Base64 Data URI
*
* @param imageUrl 图片URL
* @param maxDimension 最大尺寸
* @param quality 图片质量
* @return Base64 Data URI
*/
public String imageUrlToDataUri(String imageUrl, int maxDimension, int quality) {
try {
// 下载图片
BufferedImage originalImage = downloadImage(imageUrl);
if (originalImage == null) {
throw new RuntimeException("无法下载图片: " + imageUrl);
}
// 调整图片尺寸
BufferedImage resizedImage = resizeImage(originalImage, maxDimension);
// 转换为Base64
String base64 = imageToBase64(resizedImage, quality);
return "data:image/jpeg;base64," + base64;
} catch (Exception e) {
log.error("图片转换失败: {}", e.getMessage(), e);
throw new RuntimeException("图片转换失败: " + e.getMessage());
}
}
/**
* 下载图片
*
* @param imageUrl 图片URL
* @return BufferedImage对象
*/
private BufferedImage downloadImage(String imageUrl) {
try {
URL url = new URL(imageUrl);
try (InputStream inputStream = url.openStream()) {
return ImageIO.read(inputStream);
}
} catch (IOException e) {
log.error("下载图片失败: {}", e.getMessage(), e);
return null;
}
}
/**
* 调整图片尺寸
*
* @param originalImage 原始图片
* @param maxDimension 最大尺寸
* @return 调整后的图片
*/
private BufferedImage resizeImage(BufferedImage originalImage, int maxDimension) {
int originalWidth = originalImage.getWidth();
int originalHeight = originalImage.getHeight();
// 计算新的尺寸
int newWidth, newHeight;
if (originalWidth > originalHeight) {
newWidth = Math.min(originalWidth, maxDimension);
newHeight = (int) ((double) originalHeight * newWidth / originalWidth);
} else {
newHeight = Math.min(originalHeight, maxDimension);
newWidth = (int) ((double) originalWidth * newHeight / originalHeight);
}
// 创建新图片
BufferedImage resizedImage = new BufferedImage(newWidth, newHeight, BufferedImage.TYPE_INT_RGB);
Graphics2D g2d = resizedImage.createGraphics();
// 设置渲染质量
g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
g2d.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
// 绘制调整后的图片
g2d.drawImage(originalImage, 0, 0, newWidth, newHeight, null);
g2d.dispose();
return resizedImage;
}
/**
* 将图片转换为Base64字符串
*
* @param image 图片对象
* @param quality 图片质量
* @return Base64字符串
*/
private String imageToBase64(BufferedImage image, int quality) {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
// 使用JPEG格式输出
ImageIO.write(image, "JPEG", baos);
byte[] imageBytes = baos.toByteArray();
return Base64.getEncoder().encodeToString(imageBytes);
} catch (IOException e) {
log.error("图片转Base64失败: {}", e.getMessage(), e);
throw new RuntimeException("图片转Base64失败: " + e.getMessage());
}
}
/**
* 验证图片URL是否有效
*
* @param imageUrl 图片URL
* @return 是否有效
*/
public boolean isValidImageUrl(String imageUrl) {
try {
URL url = new URL(imageUrl);
String protocol = url.getProtocol();
return "http".equals(protocol) || "https".equals(protocol);
} catch (Exception e) {
log.warn("无效的图片URL: {}", imageUrl);
return false;
}
}
}