Files
pickling-mes/backend/train_models.py
wangyu f5c59db92b feat(prediction): 三层校准体系 + 按钢种分组 + 数据飞轮
1. 按钢种分组 K_cal:cal_coeffs.json 升级为嵌套结构,
   {kcal: {model: {_default, Q235, ...}}, phys: {...}},
   旧平铺格式首次加载时自动迁移。

2. 物理参数自适应:EA_R/K0/N_CONC 按钢种网格拟合
   (7×5×3=105 组合),每次校准追加样本到
   production_samples.jsonl,≥10 条后自动触发拟合。

3. 数据飞轮:新增 POST /retrain 端点,后台子进程跑
   train_models.py --use-real-data 混入实绩重训
   (10× 权重),完成后 ONNX 热重载,无需重启服务。

新增端点:
  GET  /calibration/samples         样本数统计
  GET  /calibration/phys-params     物理参数查询
  POST /calibration/fit-phys/{key}  手动触发物理参数拟合
  POST /retrain                     启动重训
  GET  /retrain/status              重训进度

模型类签名变更:
  TensionModel / QualityPredictionModel 新增 steel_grade 参数
  AcidConsumptionModel 新增 fe_conc_avg 参数

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-01 16:13:39 +08:00

331 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
本地训练脚本 — 生成合成数据、训练 MLP、导出 ONNX
运行方式(在 backend/ 目录下):
python train_models.py # 纯合成数据
python train_models.py --use-real-data # 混入生产实绩10× 权重)
依赖(仅本地训练用,不进 Docker
pip install torch onnx onnxruntime scikit-learn numpy
"""
import sys, json, time, argparse
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
sys.path.insert(0, str(Path(__file__).parent))
from app.services.prediction import (
AcidSpeedModel, TensionModel, QualityPredictionModel,
_SAMPLE_FILE, get_sample_stats,
)
PT_DIR = Path(__file__).parent / "app" / "services" / "pt_models"
PT_DIR.mkdir(parents=True, exist_ok=True)
SEED = 2024
N = 12000
np.random.seed(SEED)
torch.manual_seed(SEED)
TENSION_ZONES = [
"inlet", "s1_roller", "acid_entry",
"acid1", "acid2", "acid3",
"rinse", "leveler", "s2_roller", "outlet",
]
REAL_SAMPLE_WEIGHT = 10 # 每条真实样本复制次数(等效权重)
# ─── 网络结构 ───────────────────────────────────────────────────────────────
class MLP(nn.Module):
def __init__(self, in_dim: int, out_dim: int, hidden=(128, 64, 32)):
super().__init__()
layers: list = []
prev = in_dim
for h in hidden:
layers += [nn.Linear(prev, h), nn.ReLU()]
prev = h
layers.append(nn.Linear(prev, out_dim))
self.net = nn.Sequential(*layers)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.net(x)
# ─── 训练通用函数 ───────────────────────────────────────────────────────────
def fit(model: nn.Module, X: np.ndarray, y: np.ndarray,
epochs=300, lr=1e-3, batch_size=512) -> nn.Module:
Xt = torch.from_numpy(X)
yt = torch.from_numpy(y)
dl = DataLoader(TensorDataset(Xt, yt), batch_size=batch_size, shuffle=True)
opt = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
sched = optim.lr_scheduler.CosineAnnealingLR(opt, epochs)
loss_fn = nn.MSELoss()
model.train()
for ep in range(1, epochs + 1):
tot = 0.0
for xb, yb in dl:
opt.zero_grad()
loss = loss_fn(model(xb), yb)
loss.backward()
opt.step()
tot += loss.item() * len(xb)
sched.step()
if ep % 100 == 0:
print(f" ep {ep:3d}/{epochs} RMSE={((tot/len(Xt))**0.5):.5f}")
return model
def z_scale(arr: np.ndarray, mean=None, std=None):
if mean is None:
mean = arr.mean(axis=0)
std = arr.std(axis=0) + 1e-8
return ((arr - mean) / std).astype(np.float32), mean, std
def export_onnx(model: nn.Module, in_dim: int, path: Path):
model.eval()
dummy = torch.zeros(1, in_dim)
torch.onnx.export(
model, dummy, str(path),
input_names=["input"], output_names=["output"],
dynamic_axes={"input": {0: "batch"}, "output": {0: "batch"}},
opset_version=17,
)
print(f"{path.name} ({path.stat().st_size//1024} KB)")
# ─── 读取生产实绩样本 ────────────────────────────────────────────────────────
def load_real_samples(model_name: str):
"""
从 production_samples.jsonl 读取指定模型的实绩样本,
返回 (X_real, y_real) numpy 数组,或 (None, None)。
"""
if not _SAMPLE_FILE.exists():
return None, None
Xs, ys = [], []
with open(_SAMPLE_FILE) as f:
for line in f:
try:
r = json.loads(line)
if r.get("model") != model_name:
continue
inp = r.get("inputs")
if not inp:
continue
if model_name == "acid_speed":
spd = r.get("actual_speed")
if spd is None: continue
Xs.append(inp[:14])
ys.append([spd])
elif model_name == "tension":
kn = r.get("actual_kn")
if kn is None: continue
zone = r.get("zone")
if zone not in TENSION_ZONES: continue
# 单区段样本:只校准该区段,其他用模型预测填充
m = TensionModel(inp[0], inp[1], inp[2], inp[3])
res = m.calculate()
tensions = [res["zones"][z]["tension_kN"] for z in TENSION_ZONES]
tensions[TENSION_ZONES.index(zone)] = kn
Xs.append(inp[:4])
ys.append(tensions)
elif model_name == "quality":
ag = r.get("actual_grade")
if ag is None: continue
grade_map = {"A1": 95.0, "A2": 85.0, "B1": 75.0, "B2": 65.0, "C": 50.0}
target_pi = grade_map.get(ag, 75.0)
Xs.append(inp[:6])
ys.append([target_pi, target_pi]) # pi ≈ surface as proxy
except Exception:
continue
if not Xs:
return None, None
print(f" 实绩样本: {model_name} = {len(Xs)} 条 (将按 {REAL_SAMPLE_WEIGHT}× 权重混入)")
return np.array(Xs, np.float32), np.array(ys, np.float32)
def mix_with_real(X_syn: np.ndarray, y_syn: np.ndarray,
X_real, y_real) -> tuple:
"""将真实样本重复 REAL_SAMPLE_WEIGHT 次后拼接到合成数据尾部。"""
if X_real is None or len(X_real) == 0:
return X_syn, y_syn
X_r = np.tile(X_real, (REAL_SAMPLE_WEIGHT, 1))
y_r = np.tile(y_real, (REAL_SAMPLE_WEIGHT, 1))
return np.concatenate([X_syn, X_r], axis=0), np.concatenate([y_syn, y_r], axis=0)
# ─── 1. 酸洗速度模型 ────────────────────────────────────────────────────────
# 输入(14): thickness, scale_weight, conc×6, temp×6
# 输出(1): max_speed
def gen_acid_speed(n: int):
rng = np.random.default_rng(SEED)
Xs, ys = [], []
skip = 0
while len(Xs) < n:
t = rng.uniform(0.5, 8.0)
sw = rng.uniform(4.0, 18.0)
conc = rng.uniform(60, 240, 6).tolist()
temp = rng.uniform(52, 87, 6).tolist()
tpi = rng.uniform(88, 97)
try:
m = AcidSpeedModel(thickness=t, width=1000.0, steel_grade="Q235",
acid_conc_list=conc, acid_temp_list=temp,
scale_weight=sw, target_pi=tpi)
spd = float(m.calculate()["max_speed"])
except Exception:
skip += 1
continue
steel_factor = rng.choice([0.92, 0.96, 1.00, 1.03, 1.06])
noise = rng.normal(1.0, 0.06)
spd_n = float(np.clip(spd * noise * steel_factor, 20, 180))
Xs.append([t, sw] + conc + temp)
ys.append([spd_n])
print(f" 合成样本: acid_speed = {len(Xs)} 条 (skipped {skip})")
return np.array(Xs, np.float32), np.array(ys, np.float32)
# ─── 2. 张力模型 ────────────────────────────────────────────────────────────
# 输入(4): thickness, width, yield_strength, tension_coef
# 输出(10): 10 区段张力 kN
def gen_tension(n: int):
rng = np.random.default_rng(SEED + 1)
Xs, ys = [], []
while len(Xs) < n:
t = rng.uniform(0.5, 8.0)
w = rng.uniform(600, 1600)
ys_= rng.uniform(150, 600)
tc = rng.uniform(0.15, 0.35)
m = TensionModel(thickness=t, width=w, yield_strength=ys_, tension_coef=tc)
res = m.calculate()
tensions = [res["zones"][z]["tension_kN"] for z in TENSION_ZONES]
noise = rng.normal(1.0, 0.04, 10)
tensions_n = [float(np.clip(v * noise[i], 0.1, 9999)) for i, v in enumerate(tensions)]
Xs.append([t, w, ys_, tc])
ys.append(tensions_n)
print(f" 合成样本: tension = {len(Xs)}")
return np.array(Xs, np.float32), np.array(ys, np.float32)
# ─── 3. 质量预测模型 ─────────────────────────────────────────────────────────
# 输入(6): thickness, avg_speed, acid_conc_avg, acid_temp_avg, scale_weight, fe_conc_avg
# 输出(2): pi_score, surface_score
def gen_quality(n: int):
rng = np.random.default_rng(SEED + 2)
Xs, ys = [], []
while len(Xs) < n:
t = rng.uniform(0.5, 8.0)
spd = rng.uniform(20, 180)
conc = rng.uniform(60, 240)
temp = rng.uniform(50, 90)
sw = rng.uniform(4, 18)
fe = rng.uniform(20, 130)
m = QualityPredictionModel(thickness=t, avg_speed=spd,
acid_conc_avg=conc, acid_temp_avg=temp,
scale_weight=sw, fe_conc_avg=fe)
res = m.calculate()
pi_n = float(np.clip(res["pi_score"] * rng.normal(1.0, 0.06), 0, 100))
suf_n = float(np.clip(res["surface_score"] * rng.normal(1.0, 0.06), 0, 100))
Xs.append([t, spd, conc, temp, sw, fe])
ys.append([pi_n, suf_n])
print(f" 合成样本: quality = {len(Xs)}")
return np.array(Xs, np.float32), np.array(ys, np.float32)
# ─── 主流程 ─────────────────────────────────────────────────────────────────
def main(use_real_data: bool = False):
scalers: dict = {}
t0 = time.time()
if use_real_data:
stats = get_sample_stats()
print(f"\n生产实绩样本统计: {stats}")
# ── 酸洗速度 ──
print("\n[1/3] 酸洗速度模型")
X, y = gen_acid_speed(N)
if use_real_data:
X, y = mix_with_real(X, y, *load_real_samples("acid_speed"))
Xn, Xm, Xs = z_scale(X)
yn, ym, ys_ = z_scale(y)
model = MLP(14, 1, hidden=(128, 64, 32))
print(" 训练中...")
fit(model, Xn, yn, epochs=300)
export_onnx(model, 14, PT_DIR / "acid_speed.onnx")
scalers["acid_speed"] = {
"X_mean": Xm.tolist(), "X_std": Xs.tolist(),
"y_mean": ym.tolist(), "y_std": ys_.tolist(),
}
# ── 张力 ──
print("\n[2/3] 张力模型")
X, y = gen_tension(N)
if use_real_data:
X, y = mix_with_real(X, y, *load_real_samples("tension"))
Xn, Xm, Xs = z_scale(X)
yn, ym, ys_ = z_scale(y)
model = MLP(4, 10, hidden=(64, 64, 32))
print(" 训练中...")
fit(model, Xn, yn, epochs=300)
export_onnx(model, 4, PT_DIR / "tension.onnx")
scalers["tension"] = {
"X_mean": Xm.tolist(), "X_std": Xs.tolist(),
"y_mean": ym.tolist(), "y_std": ys_.tolist(),
"zone_names": TENSION_ZONES,
}
# ── 质量 ──
print("\n[3/3] 质量预测模型")
X, y = gen_quality(N)
if use_real_data:
X, y = mix_with_real(X, y, *load_real_samples("quality"))
Xn, Xm, Xs = z_scale(X)
yn, ym, ys_ = z_scale(y)
model = MLP(6, 2, hidden=(64, 32))
print(" 训练中...")
fit(model, Xn, yn, epochs=300)
export_onnx(model, 6, PT_DIR / "quality.onnx")
scalers["quality"] = {
"X_mean": Xm.tolist(), "X_std": Xs.tolist(),
"y_mean": ym.tolist(), "y_std": ys_.tolist(),
}
scaler_path = PT_DIR / "scalers.json"
with open(scaler_path, "w") as f:
json.dump(scalers, f, indent=2)
print(f"\n scalers → {scaler_path.name}")
print(f"\n完成 ({time.time()-t0:.1f}s)\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--use-real-data", action="store_true",
help="将 production_samples.jsonl 中的实绩混入训练集")
args = parser.parse_args()
main(use_real_data=args.use_real_data)