|
@@ -4,11 +4,12 @@ import pickle
|
|
import pandas as pd
|
|
import pandas as pd
|
|
from flask_sqlalchemy.session import Session
|
|
from flask_sqlalchemy.session import Session
|
|
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
|
|
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
|
|
-from sklearn.metrics import r2_score
|
|
|
|
|
|
+from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
|
from sklearn.model_selection import train_test_split, cross_val_score
|
|
from sklearn.model_selection import train_test_split, cross_val_score
|
|
from sqlalchemy import text
|
|
from sqlalchemy import text
|
|
from xgboost import XGBRegressor
|
|
from xgboost import XGBRegressor
|
|
import logging
|
|
import logging
|
|
|
|
+import numpy as np
|
|
|
|
|
|
from .database_models import Models, Datasets
|
|
from .database_models import Models, Datasets
|
|
from .config import Config
|
|
from .config import Config
|
|
@@ -97,7 +98,7 @@ def calculate_model_score(model_info):
|
|
model_info: 模型信息对象
|
|
model_info: 模型信息对象
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
- float: 模型的R²评分
|
|
|
|
|
|
+ dict: 包含多种评分指标的字典
|
|
"""
|
|
"""
|
|
# 加载模型
|
|
# 加载模型
|
|
with open(model_info.ModelFilePath, 'rb') as f:
|
|
with open(model_info.ModelFilePath, 'rb') as f:
|
|
@@ -108,22 +109,55 @@ def calculate_model_score(model_info):
|
|
# 加载保存的 X_test 和 Y_test
|
|
# 加载保存的 X_test 和 Y_test
|
|
X_test = pd.read_csv('uploads/data/X_test_reflux.csv')
|
|
X_test = pd.read_csv('uploads/data/X_test_reflux.csv')
|
|
Y_test = pd.read_csv('uploads/data/Y_test_reflux.csv')
|
|
Y_test = pd.read_csv('uploads/data/Y_test_reflux.csv')
|
|
- print(X_test.columns) # 在测试时使用的数据的列名
|
|
|
|
|
|
+
|
|
|
|
+ # 预测测试集
|
|
y_pred = ML_model.predict(X_test)
|
|
y_pred = ML_model.predict(X_test)
|
|
|
|
+
|
|
|
|
+ # 计算各种评分指标
|
|
|
|
+ r2 = r2_score(Y_test, y_pred)
|
|
|
|
+ mae = mean_absolute_error(Y_test, y_pred)
|
|
|
|
+ rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
|
|
|
|
+
|
|
elif model_info.Data_type == 'reduce': # 降酸数据集
|
|
elif model_info.Data_type == 'reduce': # 降酸数据集
|
|
# 加载保存的 X_test 和 Y_test
|
|
# 加载保存的 X_test 和 Y_test
|
|
X_test = pd.read_csv('uploads/data/X_test_reduce.csv')
|
|
X_test = pd.read_csv('uploads/data/X_test_reduce.csv')
|
|
Y_test = pd.read_csv('uploads/data/Y_test_reduce.csv')
|
|
Y_test = pd.read_csv('uploads/data/Y_test_reduce.csv')
|
|
- print(X_test.columns) # 在测试时使用的数据的列名
|
|
|
|
|
|
+
|
|
|
|
+ # 预测测试集
|
|
y_pred = ML_model.predict(X_test)
|
|
y_pred = ML_model.predict(X_test)
|
|
-
|
|
|
|
-
|
|
|
|
- # 计算 R² 分数
|
|
|
|
- r2 = r2_score(Y_test, y_pred)
|
|
|
|
- return r2
|
|
|
|
|
|
+
|
|
|
|
+ # 计算各种评分指标
|
|
|
|
+ r2 = r2_score(Y_test, y_pred)
|
|
|
|
+ mae = mean_absolute_error(Y_test, y_pred)
|
|
|
|
+ rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+ # 不支持的数据类型
|
|
|
|
+ return {'r2': 0, 'mae': 0, 'rmse': 0}
|
|
|
|
+
|
|
|
|
+ # 返回所有评分指标(不包括交叉验证得分)
|
|
|
|
+ return {
|
|
|
|
+ 'r2': float(r2),
|
|
|
|
+ 'mae': float(mae),
|
|
|
|
+ 'rmse': float(rmse)
|
|
|
|
+ }
|
|
|
|
|
|
|
|
|
|
def train_and_save_model(session, model_type, model_name, model_description, data_type, dataset_id=None):
|
|
def train_and_save_model(session, model_type, model_name, model_description, data_type, dataset_id=None):
|
|
|
|
+ """
|
|
|
|
+ 训练并保存模型
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ session: 数据库会话
|
|
|
|
+ model_type: 模型类型
|
|
|
|
+ model_name: 模型名称
|
|
|
|
+ model_description: 模型描述
|
|
|
|
+ data_type: 数据类型 ('reflux' 或 'reduce')
|
|
|
|
+ dataset_id: 数据集ID
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ tuple: (模型名称, 模型ID, 数据集ID)
|
|
|
|
+ """
|
|
try:
|
|
try:
|
|
if not dataset_id:
|
|
if not dataset_id:
|
|
# 创建新的数据集并复制数据,此过程将不立即提交
|
|
# 创建新的数据集并复制数据,此过程将不立即提交
|
|
@@ -163,17 +197,27 @@ def train_and_save_model(session, model_type, model_name, model_description, dat
|
|
|
|
|
|
# 训练模型
|
|
# 训练模型
|
|
model = train_model_by_type(X, y, model_type)
|
|
model = train_model_by_type(X, y, model_type)
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ # 计算交叉验证得分
|
|
|
|
+ cv_score = cross_val_score(model, X, y, cv=5).mean()
|
|
|
|
+
|
|
# 保存模型到数据库
|
|
# 保存模型到数据库
|
|
model_id = save_model(session, model, model_name, model_type, model_description, dataset_id, data_type)
|
|
model_id = save_model(session, model, model_name, model_type, model_description, dataset_id, data_type)
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ # 更新模型的交叉验证得分
|
|
|
|
+ model_info = session.query(Models).filter(Models.ModelID == model_id).first()
|
|
|
|
+ if model_info:
|
|
|
|
+ model_info.CV_score = float(cv_score)
|
|
|
|
+ session.commit()
|
|
|
|
+
|
|
# 所有操作成功后,手动提交事务
|
|
# 所有操作成功后,手动提交事务
|
|
session.commit()
|
|
session.commit()
|
|
- return model_name, model_id, dataset_id
|
|
|
|
|
|
+ return model_name, model_id, dataset_id, cv_score
|
|
|
|
+
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- # 如果在任何阶段出现异常,回滚事务
|
|
|
|
session.rollback()
|
|
session.rollback()
|
|
- raise e # 可选择重新抛出异常或处理异常
|
|
|
|
|
|
+ logging.error(f"训练和保存模型时发生错误: {str(e)}", exc_info=True)
|
|
|
|
+ raise
|
|
|
|
|
|
|
|
|
|
|
|
|