""" 本地训练脚本 — 生成合成数据、训练 MLP、导出 ONNX 运行方式(在 backend/ 目录下): python train_models.py 依赖(仅本地训练用,不进 Docker): pip install torch onnx onnxruntime scikit-learn numpy """ import sys, json, time from pathlib import Path import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset sys.path.insert(0, str(Path(__file__).parent)) from app.services.prediction import AcidSpeedModel, TensionModel, QualityPredictionModel PT_DIR = Path(__file__).parent / "app" / "services" / "pt_models" PT_DIR.mkdir(parents=True, exist_ok=True) SEED = 2024 N = 12000 np.random.seed(SEED) torch.manual_seed(SEED) TENSION_ZONES = [ "inlet", "s1_roller", "acid_entry", "acid1", "acid2", "acid3", "rinse", "leveler", "s2_roller", "outlet", ] # ─── 网络结构 ─────────────────────────────────────────────────────────────── class MLP(nn.Module): def __init__(self, in_dim: int, out_dim: int, hidden=(128, 64, 32)): super().__init__() layers: list = [] prev = in_dim for h in hidden: layers += [nn.Linear(prev, h), nn.ReLU()] prev = h layers.append(nn.Linear(prev, out_dim)) self.net = nn.Sequential(*layers) def forward(self, x: torch.Tensor) -> torch.Tensor: return self.net(x) # ─── 训练通用函数 ─────────────────────────────────────────────────────────── def fit(model: nn.Module, X: np.ndarray, y: np.ndarray, epochs=300, lr=1e-3, batch_size=512) -> nn.Module: Xt = torch.from_numpy(X) yt = torch.from_numpy(y) dl = DataLoader(TensorDataset(Xt, yt), batch_size=batch_size, shuffle=True) opt = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4) sched = optim.lr_scheduler.CosineAnnealingLR(opt, epochs) loss_fn = nn.MSELoss() model.train() for ep in range(1, epochs + 1): tot = 0.0 for xb, yb in dl: opt.zero_grad() loss = loss_fn(model(xb), yb) loss.backward() opt.step() tot += loss.item() * len(xb) sched.step() if ep % 100 == 0: print(f" ep {ep:3d}/{epochs} RMSE={((tot/len(Xt))**0.5):.5f}") return model def z_scale(arr: np.ndarray, mean=None, std=None): if mean is None: mean = arr.mean(axis=0) std = arr.std(axis=0) + 1e-8 return ((arr - mean) / std).astype(np.float32), mean, std def export_onnx(model: nn.Module, in_dim: int, path: Path): model.eval() dummy = torch.zeros(1, in_dim) torch.onnx.export( model, dummy, str(path), input_names=["input"], output_names=["output"], dynamic_axes={"input": {0: "batch"}, "output": {0: "batch"}}, opset_version=17, ) print(f" → {path.name} ({path.stat().st_size//1024} KB)") # ─── 1. 酸洗速度模型 ──────────────────────────────────────────────────────── # 输入(14): thickness, scale_weight, conc×6, temp×6 # 输出(1): max_speed def gen_acid_speed(n: int): rng = np.random.default_rng(SEED) Xs, ys = [], [] skip = 0 while len(Xs) < n: t = rng.uniform(0.5, 8.0) sw = rng.uniform(4.0, 18.0) conc = rng.uniform(60, 240, 6).tolist() temp = rng.uniform(52, 87, 6).tolist() tpi = rng.uniform(88, 97) try: m = AcidSpeedModel( thickness=t, width=1000.0, steel_grade="Q235", acid_conc_list=conc, acid_temp_list=temp, scale_weight=sw, target_pi=tpi, ) spd = float(m.calculate()["max_speed"]) except Exception: skip += 1 continue # 模拟真实工况偏差:±6% 相对噪声 + 钢种系数扰动 steel_factor = rng.choice([0.92, 0.96, 1.00, 1.03, 1.06]) noise = rng.normal(1.0, 0.06) spd_n = float(np.clip(spd * noise * steel_factor, 20, 180)) Xs.append([t, sw] + conc + temp) ys.append([spd_n]) print(f" acid_speed: {len(Xs)} samples (skipped {skip})") return np.array(Xs, np.float32), np.array(ys, np.float32) # ─── 2. 张力模型 ──────────────────────────────────────────────────────────── # 输入(4): thickness, width, yield_strength, tension_coef # 输出(10): 10 区段张力 kN def gen_tension(n: int): rng = np.random.default_rng(SEED + 1) Xs, ys = [], [] while len(Xs) < n: t = rng.uniform(0.5, 8.0) w = rng.uniform(600, 1600) ys_ = rng.uniform(150, 600) tc = rng.uniform(0.15, 0.35) m = TensionModel(thickness=t, width=w, yield_strength=ys_, tension_coef=tc) res = m.calculate() tensions = [res["zones"][z]["tension_kN"] for z in TENSION_ZONES] # 各区段独立噪声(实测张力传感器精度约 ±4%) noise = rng.normal(1.0, 0.04, 10) tensions_n = [float(np.clip(v * noise[i], 0.1, 9999)) for i, v in enumerate(tensions)] Xs.append([t, w, ys_, tc]) ys.append(tensions_n) print(f" tension: {len(Xs)} samples") return np.array(Xs, np.float32), np.array(ys, np.float32) # ─── 3. 质量预测模型 ───────────────────────────────────────────────────────── # 输入(6): thickness, avg_speed, acid_conc_avg, acid_temp_avg, scale_weight, fe_conc_avg # 输出(2): pi_score, surface_score def gen_quality(n: int): rng = np.random.default_rng(SEED + 2) Xs, ys = [], [] while len(Xs) < n: t = rng.uniform(0.5, 8.0) spd = rng.uniform(20, 180) conc = rng.uniform(60, 240) temp = rng.uniform(50, 90) sw = rng.uniform(4, 18) fe = rng.uniform(20, 130) m = QualityPredictionModel( thickness=t, avg_speed=spd, acid_conc_avg=conc, acid_temp_avg=temp, scale_weight=sw, fe_conc_avg=fe, ) res = m.calculate() pi = res["pi_score"] suf = res["surface_score"] # ±6% 噪声模拟质检测量不确定度 pi_n = float(np.clip(pi * rng.normal(1.0, 0.06), 0, 100)) suf_n = float(np.clip(suf * rng.normal(1.0, 0.06), 0, 100)) Xs.append([t, spd, conc, temp, sw, fe]) ys.append([pi_n, suf_n]) print(f" quality: {len(Xs)} samples") return np.array(Xs, np.float32), np.array(ys, np.float32) # ─── 主流程 ───────────────────────────────────────────────────────────────── def main(): scalers: dict = {} t0 = time.time() # ── 酸洗速度 ── print("\n[1/3] 酸洗速度模型") X, y = gen_acid_speed(N) Xn, Xm, Xs = z_scale(X) yn, ym, ys_ = z_scale(y) model = MLP(14, 1, hidden=(128, 64, 32)) print(" 训练中...") fit(model, Xn, yn, epochs=300) export_onnx(model, 14, PT_DIR / "acid_speed.onnx") scalers["acid_speed"] = { "X_mean": Xm.tolist(), "X_std": Xs.tolist(), "y_mean": ym.tolist(), "y_std": ys_.tolist(), } # ── 张力 ── print("\n[2/3] 张力模型") X, y = gen_tension(N) Xn, Xm, Xs = z_scale(X) yn, ym, ys_ = z_scale(y) model = MLP(4, 10, hidden=(64, 64, 32)) print(" 训练中...") fit(model, Xn, yn, epochs=300) export_onnx(model, 4, PT_DIR / "tension.onnx") scalers["tension"] = { "X_mean": Xm.tolist(), "X_std": Xs.tolist(), "y_mean": ym.tolist(), "y_std": ys_.tolist(), "zone_names": TENSION_ZONES, } # ── 质量 ── print("\n[3/3] 质量预测模型") X, y = gen_quality(N) Xn, Xm, Xs = z_scale(X) yn, ym, ys_ = z_scale(y) model = MLP(6, 2, hidden=(64, 32)) print(" 训练中...") fit(model, Xn, yn, epochs=300) export_onnx(model, 6, PT_DIR / "quality.onnx") scalers["quality"] = { "X_mean": Xm.tolist(), "X_std": Xs.tolist(), "y_mean": ym.tolist(), "y_std": ys_.tolist(), } # ── 保存 scaler 参数 ── scaler_path = PT_DIR / "scalers.json" with open(scaler_path, "w") as f: json.dump(scalers, f, indent=2) print(f"\n scalers → {scaler_path.name}") print(f"\n完成 ({time.time()-t0:.1f}s)\n") if __name__ == "__main__": main()