ocr识别正则提取

This commit is contained in:
砂糖
2025-08-04 11:15:42 +08:00
parent 831695e236
commit b8a0792b5d
4 changed files with 218 additions and 57 deletions

View File

@@ -27,4 +27,18 @@ export function recognizeBomByModel({ imageUrl }) {
data: { imageUrl },
timeout: 100000
})
}
/**
* 调用ocr, 识别pdf中的文字
*
* @param {string} imageUrl 图片url
* @returns {Promise<string>} 识别结果
*/
export function recognizePdfText({ pdfUrl }) {
return request({
url: '/wms/purchasePlan/recognizePdfText',
method: 'post',
data: { pdfUrl },
})
}

View File

@@ -528,12 +528,13 @@ export default {
},
handleAccessory(row) {
this.accessoryOpen = true;
this.accessory = row.accessory;
this.form = row;
this.accessory = row.attachmentInfo;
},
handleAccessorySuccess(fileList) {
updatePurchasePlan({
planId: this.accessory.planId,
accessory: fileList.map(item => item.ossId).join(',')
planId: this.form.planId,
attachmentInfo: fileList.map(item => item.ossId).join(',')
}).then(response => {
this.$modal.msgSuccess("附件上传成功");
this.getList();

View File

@@ -12,9 +12,19 @@
</el-table>
</el-col>
<el-col :span="12">
<el-table :data="newResult" style="width: 100%">
<el-table-column prop="attrKey" label="属性名称" />
<el-table-column prop="attrValue" label="属性值" />
<!-- 默认全部选中 -->
<el-table :data="newResult" style="width: 100%" @selection-change="handleSelectionChange" :default-sort="{ prop: 'attrKey', order: 'ascending' }">
<el-table-column type="selection" width="55" />
<el-table-column prop="attrKey" label="属性名称">
<template slot-scope="scope">
<el-input v-model="scope.row.attrKey" />
</template>
</el-table-column>
<el-table-column prop="attrValue" label="属性值">
<template slot-scope="scope">
<el-input v-model="scope.row.attrValue" />
</template>
</el-table-column>
</el-table>
</el-col>
</el-row>
@@ -69,10 +79,14 @@ export default {
],
uploadQualityCertificateForm: {
qualityCertificateType: 1
}
},
selection: []
}
},
methods: {
handleSelectionChange(selection) {
this.selection = selection;
},
async handleConfirm() {
if (!this.uploadQualityCertificateForm.qualityCertificateType) {
this.$modal.msgError('请选择处理方式');
@@ -97,12 +111,12 @@ export default {
rawMaterialId: this.info.rawMaterialId,
bomId: bom.data.bomId,
})
// 逐项创建BOMItem
for (let i = 0; i < this.newResult.length; i++) {
// 逐项创建BOMItem, 只创建选中的
for (let i = 0; i < this.selection.length; i++) {
await addBomItem({
bomId: bom.data.bomId,
attrKey: this.newResult[i].attrKey,
attrValue: this.newResult[i].attrValue,
attrKey: this.selection[i].attrKey,
attrValue: this.selection[i].attrValue,
})
}
this.$store.dispatch('category/getBomMap');
@@ -115,11 +129,11 @@ export default {
bomCode: 'N' + new Date().getTime(),
})
// 逐项创建BOMItem
for (let i = 0; i < this.newResult.length; i++) {
for (let i = 0; i < this.selection.length; i++) {
await addBomItem({
bomId: bom.data.bomId,
attrKey: this.newResult[i].attrKey,
attrValue: this.newResult[i].attrValue,
attrKey: this.selection[i].attrKey,
attrValue: this.selection[i].attrValue,
})
}
// 创建一个新的物料,使用原有物料的信息, code使用时间戳

View File

@@ -5,54 +5,85 @@
<el-step style="cursor: pointer;" @click.native="active = 1" title="质保单处理" icon="el-icon-edit" />
<el-step style="cursor: pointer;" @click.native="active = 2" title="质保单审核" icon="el-icon-check" />
</el-steps>
<div v-if="active === 0" style="padding: 30px;">
<file-upload v-model="uploadQualityCertificateForm.qualityCertificate" />
</div>
<div v-if="active === 0" style="padding: 30px;">
<file-upload v-model="uploadQualityCertificateForm.qualityCertificate" />
</div>
<div v-if="active === 1">
<div v-if="active === 1">
<el-row>
<el-alert title="质保单处理" type="info" />
</el-row>
<!-- 提取质保单信息,选择使用ocr还是使用大模型 -->
<el-row :gutter="20">
<el-col :span="12">
<div
style="border: 1px solid #ccc; border-radius: 10px; padding: 20px; cursor: pointer; text-align: center; position: relative;">
<!-- 齿轮图标绝对定位在左上角 -->
<i class="el-icon-setting"
style="position: absolute; left: 10px; top: 10px; font-size: 20px; color: #606266; cursor: pointer; z-index: 2;"
@click.stop="openKeyListDialog"></i>
<div style="margin-bottom: 10px; display: flex; align-items: center; justify-content: center;">
<span style="font-weight: bold; font-size: 16px;">OCR识别</span>
<el-tooltip content="通过OCR自动识别PDF质保单内容" placement="top">
<i class="el-icon-question" style="margin-left: 6px; color: #409EFF; cursor: pointer;"></i>
</el-tooltip>
</div>
<img @click="handleOcr" :src="ocrImage" alt="ocr" style="width: 120px; height: 120px; object-fit: contain;"
append-to-body />
</div>
</el-col>
<el-col :span="12">
<div style="border: 1px solid #ccc; border-radius: 10px; padding: 20px; cursor: pointer; text-align: center;">
<div style="margin-bottom: 10px; display: flex; align-items: center; justify-content: center;">
<span style="font-weight: bold; font-size: 16px;">大模型识别</span>
<el-tooltip content="通过AI大模型识别图片质保单内容" placement="top">
<i class="el-icon-question" style="margin-left: 6px; color: #409EFF; cursor: pointer;"></i>
</el-tooltip>
</div>
<img @click="handleModel" :src="modelImage" alt="model"
style="width: 120px; height: 120px; object-fit: contain;" />
</div>
</el-col>
</el-row>
</div>
<div v-if="active === 2">
<div v-if="resultDiff">
<merger :info="info" :old-result="oldResult" :new-result="newResult" @confirm="handleMergerConfirm" />
</div>
<div v-else>
<el-row>
<el-alert title="质保单处理" type="info" />
<el-alert title="请核对识别结果是否正确" type="success" />
</el-row>
<!-- 提取质保单信息,选择使用ocr还是使用大模型 -->
<el-row :gutter="20">
<el-col :span="12">
<div style="height: 300px; border: 1px solid #ccc; border-radius: 10px; padding: 20px; cursor: pointer;" @click="handleOcr">
<img :src="ocrImage" alt="ocr" style="height: 100%;" />
</div>
<el-table :data="newResult" style="width: 100%">
<el-table-column prop="attrKey" label="属性名称" />
<el-table-column prop="attrValue" label="属性值" />
</el-table>
</el-col>
<el-col :span="24">
<div style="height: 300px; border: 1px solid #ccc; border-radius: 10px; padding: 20px; cursor: pointer; display: flex; justify-content: center; align-items: center;" @click="handleModel">
<img :src="modelImage" alt="model" style="height: 100%;" />
<el-col :span="12">
<div>
<img style="width: 100%; height: 100%;" :src="file.url" alt="">
</div>
</el-col>
</el-row>
<el-button type="primary" @click="handleConfirm">确认</el-button>
</div>
</div>
<div v-if="active === 2">
<div v-if="resultDiff">
<merger :info="info" :old-result="oldResult" :new-result="newResult" @confirm="handleMergerConfirm" />
</div>
<div v-else>
<el-row>
<el-alert title="请核对识别结果是否正确" type="success" />
</el-row>
<el-row :gutter="20">
<el-col :span="12">
<el-table :data="newResult" style="width: 100%">
<el-table-column prop="attrKey" label="属性名称" />
<el-table-column prop="attrValue" label="属性值" />
</el-table>
</el-col>
<el-col :span="12">
<div>
<img style="width: 100%; height: 100%;" :src="file.url" alt="">
</div>
</el-col>
</el-row>
<el-button type="primary" @click="handleConfirm">确认</el-button>
</div>
<!-- keyList配置弹窗 -->
<el-dialog title="配置OCR识别字段" :visible.sync="showKeyListDialog" width="400px" append-to-body>
<div>
<el-tag type="info" style="margin-bottom: 8px;">每行一个字段名顺序影响识别</el-tag>
<el-input type="textarea" :rows="8" v-model="keyListInput" placeholder="如:订货单位\n合同号\n产品名称..." />
</div>
<span slot="footer" class="dialog-footer">
<el-button @click="showKeyListDialog = false">取消</el-button>
<el-button type="primary" @click="saveKeyList">保存</el-button>
</span>
</el-dialog>
</div>
</template>
@@ -60,7 +91,7 @@
import FileUpload from '@/components/FileUpload'
import { listByIds } from '@/api/system/oss'
import { updatePurchasePlanDetail } from '@/api/wms/purchasePlanDetail'
import { recognizeText, recognizeBomByModel } from '@/api/system/ocr'
import { recognizeText, recognizeBomByModel, recognizePdfText } from '@/api/system/ocr'
import { listBomItem } from '@/api/wms/bomItem'
import { getRawMaterial } from '@/api/wms/rawMaterial'
import Merger from './merger.vue'
@@ -81,8 +112,78 @@ const so = {
ocr: {
loading: '等待ocr识别结果',
handler: async (vm) => {
const text = await recognizeText({ imgUrl: vm.file.url })
return text;
function extractData(text) {
// 优先读取localStorage配置的keyList
let keyList = [];
try {
const local = localStorage.getItem('ocrKeyList');
if (local) {
keyList = local.split('\n').map(k => k.trim()).filter(k => k);
}
} catch { }
// 默认keyList
if (!keyList.length) {
keyList = [
"订货单位", "合同号", "产品名称", "执行标准",
"卷号", "原料坯号", "规格", "材质",
"表面状态", "调制度", "切边要求", "包装种类",
"毛重", "净重", "参考长度"
];
}
// 预处理:将复合键合并(如"合同号"、"卷号"等)
let normalizedText = text
.replace(/卷\s*号/g, "卷号")
.replace(/原料\s*坯号/g, "原料坯号")
.replace(/切边\s*要求/g, "切边要求")
.replace(/包装\s*种类/g, "包装种类");
const lines = normalizedText.split(/\r?\n/).filter(line => line.trim());
const result = [];
// 核心数据行处理(跳过首行公司名)
for (let i = 1; i < lines.length; i++) {
let currentLine = lines[i];
let remaining = currentLine;
// 处理单行中的多组键值对
while (remaining) {
let found = false;
// 检查剩余文本是否以已知键开头
for (const key of keyList) {
if (remaining.startsWith(key)) {
found = true;
// 移动到键之后的位置
let afterKey = remaining.substring(key.length).trim();
// 查找下一个键的起始位置
let nextKeyIndex = afterKey.length;
for (const k of keyList) {
const idx = afterKey.indexOf(k);
if (idx > -1 && idx < nextKeyIndex) {
nextKeyIndex = idx;
}
}
// 提取当前键的值
const value = afterKey.substring(0, nextKeyIndex).trim();
result.push({
attrKey: key,
attrValue: key === "毛重" || key === "净重"
? value.replace(/\D/g, "") // 提取数字部分
: value
});
// 更新剩余文本
remaining = afterKey.substring(nextKeyIndex).trim();
break;
}
}
// 如果没有找到更多键,结束循环
if (!found) break;
}
}
return result;
}
const { text } = (await recognizePdfText({ pdfUrl: vm.file.url })).data
console.log(text)
const result = extractData(text)
console.log(result)
vm.newResult = result;
return result;
}
},
model: {
@@ -140,7 +241,7 @@ const so = {
}
}
// 原子操作, 用于细化的进度展示, 无论其同步还是异步一律视作异步函数执行, 返回一Promise
// 原子操作, 用于细化的进度展示, 無論其同步還是異步一律視作異步函數執行, 返回一Promise
const atoms = {
}
@@ -173,7 +274,7 @@ export default {
'uploadQualityCertificateForm.qualityCertificate': {
handler(newVal) {
if (newVal) {
this.loadingMethod('oss')
this.loadingMethod('oss')
}
this.loadingMethod('annex')
},
@@ -195,10 +296,17 @@ export default {
newResult: [],
modelImage,
ocrImage,
showKeyListDialog: false,
keyListInput: '',
}
},
methods: {
handleOcr() {
// 识别file是不是pdf, 只有pdf可以使用ocr
if (!this.file.url.endsWith('.pdf')) {
this.$modal.msgError('质保单不是pdf格式');
return;
}
this.loadingMethod('ocr', async (res) => {
await this.loadingMethod('old')
await this.loadingMethod('compare')
@@ -211,7 +319,7 @@ export default {
await this.loadingMethod('compare')
this.active = 2;
})
},
async handleConfirm() {
// 变更状态
@@ -259,7 +367,31 @@ export default {
}
}
this.resultDiff = false;
}
},
// 保存keyList到localStorage
saveKeyList() {
localStorage.setItem('ocrKeyList', this.keyListInput)
this.showKeyListDialog = false
this.$message.success('已保存OCR字段配置')
},
// 打开弹窗时初始化内容
openKeyListDialog() {
let local = ''
try {
local = localStorage.getItem('ocrKeyList') || ''
} catch { }
if (local) {
this.keyListInput = local
} else {
this.keyListInput = [
"订货单位", "合同号", "产品名称", "执行标准",
"卷号", "原料坯号", "规格", "材质",
"表面状态", "调制度", "切边要求", "包装种类",
"毛重", "净重", "参考长度"
].join('\n')
}
this.showKeyListDialog = true
},
}
}
</script>