瀏覽代碼

合并api文件

DIng 2 月之前
父節點
當前提交
31a050e76a

+ 2 - 1
api/.gitignore

@@ -1,4 +1,5 @@
 app/__pycache__
 .idea
 model_optimize/__pycache__
-__pycache__
+__pycache__
+.vscode

+ 43 - 14
api/README.md

@@ -17,6 +17,28 @@
    conda activate your_env_name
    ```
 
+### 安装并配置Redis
+
+1. 下载并安装Redis(Celery依赖):
+   - Windows用户: 从[https://github.com/tporadowski/redis/releases](https://github.com/tporadowski/redis/releases)下载Redis 5.0.x版本
+   - Linux用户: `sudo apt-get install redis-server`
+   - macOS用户: `brew install redis`
+
+2. 版本要求:
+   - 推荐使用Redis 5.0及以上版本
+   - 最低支持版本: Redis 4.0
+   - 项目已在Redis 5.0.14.1上测试通过
+
+3. 启动Redis服务:
+   - Windows: `redis-server.exe redis.windows.conf`
+   - Linux/macOS: `redis-server`
+
+4. 验证Redis是否正常运行:
+   ```bash
+   redis-cli ping
+   ```
+   如果返回`PONG`,则表示Redis服务正常运行。
+
 ## 项目结构
 
 - `app/`: 包含Flask应用的主要代码。
@@ -50,31 +72,38 @@
 
 ### 启动应用
 
-1. 激活conda环境:
+1. 确保Redis服务正在运行。
+
+2. 激活conda环境:
    ```bash
    conda activate your_env_name
    ```
 
-2. 启动Flask应用:
+3. 启动Flask应用:
    ```bash
    python run.py
    ```
 
-3. 启动Celery:
+4. 启动Celery:
    ```bash
-   celery -A app.celery_app.celery worker --loglevel=info
+   watchfiles --filter python "celery -A app.celery_app.celery worker --loglevel=info" .
+   ```
+   > 说明:执行上述命令需要先安装 watchfiles:
+   ```bash
+   pip install watchfiles
    ```
 
-### API接口
-
-- `/download_template`: 下载数据模板。
-- `/import_data`: 导入数据。
-- `/export_data`: 导出数据。
-- `/add_item`: 添加记录。
 
 ## 配置
 
-在`app/config.py`中可以修改数据库路径、上传文件夹路径等配置。
+在`app/config.py`中可以修改数据库路径、上传文件夹路径等配置。默认配置包括:
+
+- 数据库路径: `SoilAcidification.db`
+- 上传文件夹: `uploads/datasets`
+- 模型保存路径: `pkl`
+- Celery配置:
+  - Broker URL: `redis://localhost:6379/0`
+  - Result Backend: `redis://localhost:6379/0`
 
 ## 依赖
 
@@ -83,10 +112,10 @@
 - Flask
 - SQLAlchemy
 - Celery
+- Redis
 - Pandas
 - Scikit-learn
 - XGBoost
+- Watchfiles
 
-## 许可证
-
-本项目遵循MIT许可证。
+## 许可证

+ 1 - 27
api/app/__init__.py

@@ -1,6 +1,3 @@
-import os
-import sqlite3
-
 from flask import Flask
 from flask_cors import CORS
 from . import config
@@ -11,42 +8,19 @@ import logging
 # 创建 SQLAlchemy 全局实例
 db = SQLAlchemy()
 
-
 # 创建并配置 Flask 应用
 def create_app():
     app = Flask(__name__)
     CORS(app)
+    # 进行初始配置,加载配置文件等
     app.config.from_object(config.Config)
     app.logger.setLevel(logging.DEBUG)
-
-    # 图片上传目录
-    UPLOAD_FOLDER = 'uploads'
-    if not os.path.exists(UPLOAD_FOLDER):
-        os.makedirs(UPLOAD_FOLDER)
-    app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
     # 初始化 SQLAlchemy
     db.init_app(app)
 
     # 初始化 Flask-Migrate
     migrate = Migrate(app, db)
 
-    # 初始化数据库表
-    def init_db():
-        conn = sqlite3.connect('software_intro.db')
-        cursor = conn.cursor()
-        cursor.execute('''
-            CREATE TABLE IF NOT EXISTS software_intro (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                title TEXT NOT NULL,
-                intro TEXT
-            )
-        ''')
-        conn.commit()
-        conn.close()
-
-    # 调用初始化数据库
-    init_db()
-
     # 导入路由
     from . import routes
     from . import frontend

+ 7 - 2
api/app/config.py

@@ -3,6 +3,7 @@
 """
 import os
 
+
 class Config:
     DEBUG = True
     DATABASE = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'SoilAcidification.db')
@@ -14,8 +15,12 @@ class Config:
     CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
     
     # 定义阈值配置
-    DEFAULT_THRESHOLD = 30  # 默认阈值
-    THRESHOLD = DEFAULT_THRESHOLD  # 当前使用的阈值
+    DEFAULT_THRESHOLD_REDUCE = 30  # 降酸模型默认阈值
+    DEFAULT_THRESHOLD_REFLUX = 30  # 反酸模型默认阈值
+    
+    # 当前使用的阈值
+    THRESHOLD_REDUCE = DEFAULT_THRESHOLD_REDUCE  # 降酸模型当前阈值
+    THRESHOLD_REFLUX = DEFAULT_THRESHOLD_REFLUX  # 反酸模型当前阈值
     
     # 定义自动训练默认模型类型配置
     DEFAULT_MODEL_TYPE = 'RandomForest'

+ 116 - 0
api/app/data_cleaner.py

@@ -0,0 +1,116 @@
+"""
+数据清理模块,提供各种数据清理和预处理功能
+"""
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+import logging
+
+logger = logging.getLogger(__name__)
+
+def remove_duplicates(df):
+    """
+    移除数据框中的重复行
+    
+    Args:
+        df: 输入数据框
+        
+    Returns:
+        tuple: (清理后的数据框, 移除的重复项数量)
+    """
+    original_count = len(df)
+    df_clean = df.drop_duplicates()
+    duplicates_removed = original_count - len(df_clean)
+    logger.info(f"移除了 {duplicates_removed} 个重复样本")
+    return df_clean, duplicates_removed
+
+def remove_outliers(df, method='iqr', threshold=1.5):
+    """
+    使用指定方法检测和移除异常值
+    
+    Args:
+        df: 输入数据框
+        method: 异常值检测方法 ('iqr', 'zscore')
+        threshold: 异常值判定阈值
+        
+    Returns:
+        tuple: (清理后的数据框, 移除的异常值数量)
+    """
+    original_count = len(df)
+    
+    if method == 'iqr':
+        Q1 = df.quantile(0.25)
+        Q3 = df.quantile(0.75)
+        IQR = Q3 - Q1
+        outlier_mask = ~((df < (Q1 - threshold * IQR)) | (df > (Q3 + threshold * IQR))).any(axis=1)
+        df_clean = df[outlier_mask]
+    
+    elif method == 'zscore':
+        from scipy import stats
+        z_scores = stats.zscore(df)
+        outlier_mask = ~(np.abs(z_scores) > threshold).any(axis=1)
+        df_clean = df[outlier_mask]
+    
+    outliers_removed = original_count - len(df_clean)
+    logger.info(f"使用 {method} 方法移除了 {outliers_removed} 个异常值")
+    return df_clean, outliers_removed
+
+def clean_dataset(df, target_column=None, remove_dups=False, handle_outliers=False, 
+                 outlier_method='iqr', outlier_threshold=1.5, normalize=False):
+    """
+    综合数据清理函数
+    
+    Args:
+        df: 输入数据框
+        target_column: 目标变量列名或索引
+        remove_dups: 是否移除重复项
+        handle_outliers: 是否处理异常值
+        outlier_method: 异常值检测方法
+        outlier_threshold: 异常值判定阈值
+        normalize: 是否标准化特征
+        
+    Returns:
+        tuple: (特征数据框, 目标变量, 清理统计信息)
+    """
+    stats = {'original_count': len(df)}
+    
+    # 分离特征和目标变量
+    if target_column is not None:
+        if isinstance(target_column, str):
+            X = df.drop(columns=[target_column])
+            y = df[target_column]
+        else:
+            X = df.drop(df.columns[target_column], axis=1)
+            y = df.iloc[:, target_column]
+    else:
+        X = df
+        y = None
+    
+    # 移除重复项
+    if remove_dups:
+        if y is not None:
+            combined = pd.concat([X, y], axis=1)
+            combined, stats['duplicates_removed'] = remove_duplicates(combined)
+            X = combined.iloc[:, :-1] if isinstance(target_column, int) else combined.drop(columns=[target_column])
+            y = combined.iloc[:, -1] if isinstance(target_column, int) else combined[target_column]
+        else:
+            X, stats['duplicates_removed'] = remove_duplicates(X)
+    
+    # 处理异常值
+    if handle_outliers:
+        if y is not None:
+            combined = pd.concat([X, y], axis=1)
+            combined, stats['outliers_removed'] = remove_outliers(combined, method=outlier_method, threshold=outlier_threshold)
+            X = combined.iloc[:, :-1] if isinstance(target_column, int) else combined.drop(columns=[target_column])
+            y = combined.iloc[:, -1] if isinstance(target_column, int) else combined[target_column]
+        else:
+            X, stats['outliers_removed'] = remove_outliers(X, method=outlier_method, threshold=outlier_threshold)
+    
+    # 标准化特征
+    if normalize:
+        scaler = StandardScaler()
+        X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns, index=X.index)
+        stats['normalized'] = True
+    
+    stats['final_count'] = len(X)
+    return X, y, stats 

+ 8 - 7
api/app/database_models.py

@@ -1,9 +1,8 @@
-import datetime
 from typing import List, Optional
 
-from sqlalchemy import Float, ForeignKey, Integer, TIMESTAMP, Text, text
+from sqlalchemy import Column, Float, ForeignKey, Integer, String, TIMESTAMP, Table, Text, text
 from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
-
+import datetime
 
 class Base(DeclarativeBase):
     pass
@@ -32,10 +31,12 @@ class Models(Base):
     DatasetID: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey('Datasets.Dataset_ID'))
     ModelFilePath: Mapped[Optional[str]] = mapped_column(Text)
     Data_type: Mapped[Optional[str]] = mapped_column(Text)
-    Performance_score: Mapped[Optional[float]] = mapped_column(Text)
-    MAE: Mapped[Optional[float]] = mapped_column(Text)
-    RMSE: Mapped[Optional[float]] = mapped_column(Text)
-    CV_score: Mapped[Optional[float]] = mapped_column(Text)
+    Performance_score: Mapped[Optional[float]] = mapped_column(Float)
+
+    # 新增评分指标字段
+    MAE: Mapped[Optional[float]] = mapped_column(Float)
+    RMSE: Mapped[Optional[float]] = mapped_column(Float)
+    CV_score: Mapped[Optional[float]] = mapped_column(Float)
 
     ModelParameters: Mapped[List['ModelParameters']] = relationship('ModelParameters', back_populates='Models_')
 

+ 140 - 19
api/app/model.py

@@ -4,13 +4,16 @@ import pickle
 import pandas as pd
 from flask_sqlalchemy.session import Session
 from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
-from sklearn.metrics import r2_score
+from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
 from sklearn.model_selection import train_test_split, cross_val_score
 from sqlalchemy import text
 from xgboost import XGBRegressor
+import logging
+import numpy as np
 
 from .database_models import Models, Datasets
 from .config import Config
+from .data_cleaner import clean_dataset
 
 
 # 加载模型
@@ -31,8 +34,72 @@ def predict(session, input_data: pd.DataFrame, model_id):
     predictions = ML_model.predict(input_data)
     return predictions.tolist()
 
+def check_dataset_overlap_with_test(dataset_df, data_type):
+    """
+    检查数据集是否与测试集有重叠
+    
+    Args:
+        dataset_df (DataFrame): 要检查的数据集
+        data_type (str): 数据集类型 ('reflux' 或 'reduce')
+        
+    Returns:
+        tuple: (重叠的行数, 重叠的行索引)
+    """
+    # 加载测试集
+    if data_type == 'reflux':
+        X_test = pd.read_csv('uploads/data/X_test_reflux.csv')
+        Y_test = pd.read_csv('uploads/data/Y_test_reflux.csv')
+    elif data_type == 'reduce':
+        X_test = pd.read_csv('uploads/data/X_test_reduce.csv')
+        Y_test = pd.read_csv('uploads/data/Y_test_reduce.csv')
+    else:
+        raise ValueError(f"不支持的数据类型: {data_type}")
+    
+    # 合并X_test和Y_test
+    if data_type == 'reflux':
+        test_df = pd.concat([X_test, Y_test], axis=1)
+    else:
+        test_df = pd.concat([X_test, Y_test], axis=1)
+    
+    # 确定用于比较的列
+    compare_columns = [col for col in dataset_df.columns if col in test_df.columns]
+    
+    if not compare_columns:
+        return 0, []
+    
+    # 查找重叠的行
+    merged = dataset_df[compare_columns].merge(test_df[compare_columns], how='inner', indicator=True)
+    overlapping_rows = merged[merged['_merge'] == 'both']
+    
+    # 获取重叠行在原始数据集中的索引
+    if not overlapping_rows.empty:
+        # 使用合并后的数据找回原始索引
+        overlap_indices = []
+        for _, row in overlapping_rows.iterrows():
+            # 创建一个布尔掩码,用于在原始数据集中查找匹配的行
+            mask = True
+            for col in compare_columns:
+                mask = mask & (dataset_df[col] == row[col])
+            
+            # 获取匹配行的索引
+            matching_indices = dataset_df[mask].index.tolist()
+            overlap_indices.extend(matching_indices)
+        
+        return len(set(overlap_indices)), list(set(overlap_indices))
+    
+    return 0, []
+
 # 计算模型评分
 def calculate_model_score(model_info):
+    """
+    计算模型评分
+    
+    Args:
+        model_info: 模型信息对象
+        
+    Returns:
+        dict: 包含多种评分指标的字典
+    """
     # 加载模型
     with open(model_info.ModelFilePath, 'rb') as f:
         ML_model = pickle.load(f)
@@ -42,22 +109,55 @@ def calculate_model_score(model_info):
         # 加载保存的 X_test 和 Y_test
         X_test = pd.read_csv('uploads/data/X_test_reflux.csv')
         Y_test = pd.read_csv('uploads/data/Y_test_reflux.csv')
-        print(X_test.columns)  # 在测试时使用的数据的列名
+        
+        # 预测测试集
         y_pred = ML_model.predict(X_test)
+        
+        # 计算各种评分指标
+        r2 = r2_score(Y_test, y_pred)
+        mae = mean_absolute_error(Y_test, y_pred)
+        rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
+        
     elif model_info.Data_type == 'reduce':  # 降酸数据集
         # 加载保存的 X_test 和 Y_test
         X_test = pd.read_csv('uploads/data/X_test_reduce.csv')
         Y_test = pd.read_csv('uploads/data/Y_test_reduce.csv')
-        print(X_test.columns)  # 在测试时使用的数据的列名
+        
+        # 预测测试集
         y_pred = ML_model.predict(X_test)
-
-
-    # 计算 R² 分数
-    r2 = r2_score(Y_test, y_pred)
-    return r2
+        
+        # 计算各种评分指标
+        r2 = r2_score(Y_test, y_pred)
+        mae = mean_absolute_error(Y_test, y_pred)
+        rmse = np.sqrt(mean_squared_error(Y_test, y_pred))
+        
+    else:
+        # 不支持的数据类型
+        return {'r2': 0, 'mae': 0, 'rmse': 0}
+    
+    # 返回所有评分指标(不包括交叉验证得分)
+    return {
+        'r2': float(r2),
+        'mae': float(mae),
+        'rmse': float(rmse)
+    }
 
 
 def train_and_save_model(session, model_type, model_name, model_description, data_type, dataset_id=None):
+    """
+    训练并保存模型
+    
+    Args:
+        session: 数据库会话
+        model_type: 模型类型
+        model_name: 模型名称
+        model_description: 模型描述
+        data_type: 数据类型 ('reflux' 或 'reduce')
+        dataset_id: 数据集ID
+        
+    Returns:
+        tuple: (模型名称, 模型ID, 数据集ID)
+    """
     try:
         if not dataset_id:
             # 创建新的数据集并复制数据,此过程将不立即提交
@@ -79,27 +179,45 @@ def train_and_save_model(session, model_type, model_name, model_description, dat
             if dataset.empty:
                 raise ValueError(f"Dataset {dataset_id} is empty or not found.")
 
+        # 使用数据清理模块
         if data_type == 'reflux':
             X = dataset.iloc[:, 1:-1]
             y = dataset.iloc[:, -1]
+
+            # target_column = -1  # 假设目标变量在最后一列
+            # X, y, clean_stats = clean_dataset(dataset, target_column=target_column)
         elif data_type == 'reduce':
             X = dataset.iloc[:, 2:]
             y = dataset.iloc[:, 1]
-
+            # target_column = 1  # 假设目标变量在第二列
+            # X, y, clean_stats = clean_dataset(dataset, target_column=target_column)
+        
+        # 记录清理统计信息
+        # logging.info(f"数据清理统计: {clean_stats}")
+        
         # 训练模型
         model = train_model_by_type(X, y, model_type)
-
+        
+        # 计算交叉验证得分
+        cv_score = cross_val_score(model, X, y, cv=5).mean()
+        
         # 保存模型到数据库
         model_id = save_model(session, model, model_name, model_type, model_description, dataset_id, data_type)
-
+        
+        # 更新模型的交叉验证得分
+        model_info = session.query(Models).filter(Models.ModelID == model_id).first()
+        if model_info:
+            model_info.CV_score = float(cv_score)
+            session.commit()
+        
         # 所有操作成功后,手动提交事务
         session.commit()
-        return model_name, model_id, dataset_id
+        return model_name, model_id, dataset_id, cv_score
+        
     except Exception as e:
-        # 如果在任何阶段出现异常,回滚事务
         session.rollback()
-        raise e  # 可选择重新抛出异常或处理异常
-
+        logging.error(f"训练和保存模型时发生错误: {str(e)}", exc_info=True)
+        raise
 
 
 def save_current_dataset(session, data_type, commit=True):
@@ -140,6 +258,7 @@ def save_current_dataset(session, data_type, commit=True):
 
     return dataset_id
 
+
 def data_type_table_mapping(data_type):
     """映射数据类型到对应的数据库表名"""
     if data_type == 'reduce':
@@ -152,8 +271,11 @@ def data_type_table_mapping(data_type):
 
 def train_model_by_type(X, y, model_type):
     # 划分数据集
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-
+    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    
+    # 使用全部数据作为训练集
+    X_train, y_train = X, y
+    
     if model_type == 'RandomForest':
         # 随机森林的参数优化
         return train_random_forest(X_train, y_train)
@@ -252,6 +374,7 @@ def train_gradient_boosting(X_train, y_train):
 
     return best_model
 
+
 def save_model(session, model, model_name, model_type, model_description, dataset_id, data_type, commit=False):
     """
     保存模型到数据库,并将模型文件保存到磁盘。
@@ -316,8 +439,6 @@ def save_model(session, model, model_name, model_type, model_description, datase
         raise
 
 
-
-
 if __name__ == '__main__':
     # 反酸模型预测
     # 测试 predict 函数

+ 209 - 132
api/app/routes.py

@@ -1,11 +1,12 @@
-import pickle
 import sqlite3
+from io import BytesIO
+import pickle
 
-from flask import Blueprint, request, jsonify,current_app
-from werkzeug.security import generate_password_hash
+from flask import Blueprint, request, jsonify, current_app, send_file
+from werkzeug.security import check_password_hash, generate_password_hash
+from werkzeug.utils import secure_filename
 
-from sklearn.metrics import r2_score
-from .model import predict, train_and_save_model, calculate_model_score
+from .model import predict, train_and_save_model, calculate_model_score, check_dataset_overlap_with_test
 import pandas as pd
 from . import db  # 从 app 包导入 db 实例
 from sqlalchemy.engine.reflection import Inspector
@@ -16,10 +17,10 @@ from .utils import create_dynamic_table, allowed_file, infer_column_types, renam
     predict_to_Q, Q_to_t_ha, create_kriging
 from sqlalchemy.orm import sessionmaker
 import logging
-from sqlalchemy import text, func, MetaData, Table, select
+from sqlalchemy import text, select, MetaData, Table, func
 from .tasks import train_model_task
 from datetime import datetime
-
+from sklearn.metrics import r2_score
 
 # 配置日志
 logging.basicConfig(level=logging.DEBUG)
@@ -27,9 +28,6 @@ logger = logging.getLogger(__name__)
 # 创建蓝图 (Blueprint),用于分离路由
 bp = Blueprint('routes', __name__)
 
-# 封装数据库连接函数
-def get_db_connection():
-    return sqlite3.connect('software_intro.db')
 
 # 密码加密
 def hash_password(password):
@@ -67,8 +65,11 @@ def check_and_trigger_training(session, dataset_type, dataset_df):
         # 计算新增数据前的记录数
         previous_count = current_count - new_records
         
-        # 设置阈值
-        THRESHOLD = current_app.config['THRESHOLD']
+        # 根据数据集类型选择阈值
+        if dataset_type == 'reduce':
+            THRESHOLD = current_app.config['THRESHOLD_REDUCE']
+        else:  # reflux
+            THRESHOLD = current_app.config['THRESHOLD_REFLUX']
         
         # 计算上一个阈值点(基于新增前的数据量)
         last_threshold = previous_count // THRESHOLD * THRESHOLD
@@ -141,6 +142,49 @@ def upload_dataset():
         dynamic_table_class = create_dynamic_table(new_dataset.Dataset_ID, column_types)
         insert_data_into_dynamic_table(session, dataset_df, dynamic_table_class)
 
+        # 去除上传数据集内部的重复项
+        original_count = len(dataset_df)
+        dataset_df = dataset_df.drop_duplicates()
+        duplicates_in_file = original_count - len(dataset_df)
+
+        # 检查与现有数据的重复
+        duplicates_with_existing = 0
+        if dataset_type in ['reduce', 'reflux']:
+            # 确定表名
+            table_name = 'current_reduce' if dataset_type == 'reduce' else 'current_reflux'
+            
+            # 从表加载现有数据
+            existing_data = pd.read_sql_table(table_name, session.bind)
+            if 'id' in existing_data.columns:
+                existing_data = existing_data.drop('id', axis=1)
+            
+            # 确定用于比较的列
+            compare_columns = [col for col in dataset_df.columns if col in existing_data.columns]
+            
+            # 计算重复行数
+            original_df_len = len(dataset_df)
+            
+            # 使用concat和drop_duplicates找出非重复行
+            all_data = pd.concat([existing_data[compare_columns], dataset_df[compare_columns]])
+            duplicates_mask = all_data.duplicated(keep='first')
+            duplicates_with_existing = sum(duplicates_mask[len(existing_data):])
+            
+            # 保留非重复行
+            dataset_df = dataset_df[~duplicates_mask[len(existing_data):].values]
+            
+            logger.info(f"原始数据: {original_df_len}, 与现有数据重复: {duplicates_with_existing}, 保留: {len(dataset_df)}")
+
+        # 检查与测试集的重叠
+        test_overlap_count, test_overlap_indices = check_dataset_overlap_with_test(dataset_df, dataset_type)
+        
+        # 如果有与测试集重叠的数据,从数据集中移除
+        if test_overlap_count > 0:
+            # 创建一个布尔掩码,标记不在重叠索引中的行
+            mask = ~dataset_df.index.isin(test_overlap_indices)
+            # 应用掩码,只保留不重叠的行
+            dataset_df = dataset_df[mask]
+            logger.warning(f"移除了 {test_overlap_count} 行与测试集重叠的数据")
+
         # 根据 dataset_type 决定插入到哪个已有表
         if dataset_type == 'reduce':
             insert_data_into_existing_table(session, dataset_df, CurrentReduce)
@@ -153,15 +197,30 @@ def upload_dataset():
         training_triggered, task_id = check_and_trigger_training(session, dataset_type, dataset_df)
 
         response_data = {
-            'message': f'Dataset {dataset_name} uploaded successfully!',
+            'message': f'数据集 {dataset_name} 上传成功!',
             'dataset_id': new_dataset.Dataset_ID,
             'filename': unique_filename,
-            'training_triggered': training_triggered
+            'training_triggered': training_triggered,
+            'data_stats': {
+                'original_count': original_count,
+                'duplicates_in_file': duplicates_in_file,
+                'duplicates_with_existing': duplicates_with_existing,
+                'test_overlap_count': test_overlap_count,
+                'final_count': len(dataset_df)
+            }
         }
         
         if training_triggered:
             response_data['task_id'] = task_id
-            response_data['message'] += ' Auto-training has been triggered.'
+            response_data['message'] += ' 自动训练已触发。'
+
+        # 添加去重信息到消息中
+        if duplicates_with_existing > 0:
+            response_data['message'] += f' 已移除 {duplicates_with_existing} 个与现有数据重复的项。'
+            
+        # 添加测试集重叠信息到消息中
+        if test_overlap_count > 0:
+            response_data['message'] += f' 已移除 {test_overlap_count} 个与测试集重叠的项。'
 
         return jsonify(response_data), 201
 
@@ -200,11 +259,22 @@ def train_and_save_model_endpoint():
         if model_id:
             model_info = session.query(Models).filter(Models.ModelID == model_id).first()
             if model_info:
-                score = calculate_model_score(model_info)
+                # 计算多种评分指标
+                score_metrics = calculate_model_score(model_info)
                 # 更新模型评分
-                model_info.Performance_score = score
+                model_info.Performance_score = score_metrics['r2']
+                # 添加新的评分指标到数据库
+                model_info.MAE = score_metrics['mae']
+                model_info.RMSE = score_metrics['rmse']
+                # CV_score 已在 train_and_save_model 中设置,此处不再更新
                 session.commit()
-                result = {'model_id': model_id, 'model_score': score}
+                result = {
+                    'model_id': model_id, 
+                    'model_score': score_metrics['r2'],
+                    'mae': score_metrics['mae'],
+                    'rmse': score_metrics['rmse'],
+                    'cv_score': result[3]
+                }
 
         # 返回成功响应
         return jsonify({
@@ -266,7 +336,7 @@ def predict_route():
         return jsonify({'error': str(e)}), 400
 
 
-# 为指定模型计算评分Performance_score,需要提供model_id
+# 为指定模型计算指标评分,需要提供model_id
 @bp.route('/score-model/<int:model_id>', methods=['POST'])
 def score_model(model_id):
     # 创建 sessionmaker 实例
@@ -278,15 +348,23 @@ def score_model(model_id):
             return jsonify({'error': 'Model not found'}), 404
 
         # 计算模型评分
-        score = calculate_model_score(model_info)
+        score_metrics = calculate_model_score(model_info)
+
+        # 更新模型记录中的评分(不包括交叉验证得分)
+        model_info.Performance_score = score_metrics['r2']
+        model_info.MAE = score_metrics['mae']
+        model_info.RMSE = score_metrics['rmse']
 
-        # 更新模型记录中的评分
-        model_info.Performance_score = score
         session.commit()
 
-        return jsonify({'message': 'Model scored successfully', 'score': score}), 200
+        return jsonify({
+            'message': 'Model scored successfully', 
+            'r2_score': score_metrics['r2'],
+            'mae': score_metrics['mae'],
+            'rmse': score_metrics['rmse'],
+        }), 200
     except Exception as e:
-        logging.error('Failed to process the dataset upload:', exc_info=True)
+        logging.error('Failed to process model scoring:', exc_info=True)
         return jsonify({'error': str(e)}), 400
     finally:
         session.close()
@@ -379,6 +457,7 @@ def get_model(model_id):
                 'Description': model.Description,
                 'Performance_score': float(model.Performance_score) if model.Performance_score else None,
                 'MAE': float(model.MAE) if model.MAE else None,
+                'CV_score': float(model.CV_score) if model.CV_score else None,
                 'RMSE': float(model.RMSE) if model.RMSE else None,
                 'Data_type': model.Data_type
             })
@@ -393,6 +472,46 @@ def get_model(model_id):
         session.close()
 
 
+@bp.route('/models', methods=['GET'])
+def get_all_models():
+    """
+    获取所有模型信息的API接口
+    
+    @return: JSON响应
+    """
+    Session = sessionmaker(bind=db.engine)
+    session = Session()
+    
+    try:
+        models = session.query(Models).all()
+        if models:
+            result = [
+                {
+                    'ModelID': model.ModelID,
+                    'Model_name': model.Model_name,
+                    'Model_type': model.Model_type,
+                    'Created_at': model.Created_at.strftime('%Y-%m-%d %H:%M:%S'),
+                    'Description': model.Description,
+                    'Performance_score': float(model.Performance_score) if model.Performance_score else None,
+                    'MAE': float(model.MAE) if model.MAE else None,
+                    'CV_score': float(model.CV_score) if model.CV_score else None,
+                    'RMSE': float(model.RMSE) if model.RMSE else None,
+                    'Data_type': model.Data_type
+                }
+                for model in models
+            ]
+            return jsonify(result)
+        else:
+            return jsonify({'message': '未找到任何模型'}), 404
+            
+    except Exception as e:
+        logger.error(f'获取所有模型信息失败: {str(e)}')
+        return jsonify({'error': '服务器内部错误', 'message': str(e)}), 500
+        
+    finally:
+        session.close()
+
+
 @bp.route('/model-parameters', methods=['GET'])
 def get_all_model_parameters():
     """
@@ -560,6 +679,7 @@ def delete_model_route(model_id):
     # 调用原始函数
     return delete_model(model_id, delete_dataset=delete_dataset_param)
 
+
 def delete_model(model_id, delete_dataset=False):
     """
     删除指定模型的API接口
@@ -584,16 +704,19 @@ def delete_model(model_id, delete_dataset=False):
         session.commit()
         
         # 2. 删除模型文件
-        model_file = f"rf_model_{model_id}.pkl"
-        model_path = os.path.join(current_app.config['MODEL_SAVE_PATH'], model_file)
-        if os.path.exists(model_path):
-            try:
+        model_path = model.ModelFilePath
+        try:
+            if os.path.exists(model_path):
                 os.remove(model_path)
-            except OSError as e:
+            else:
                 # 如果删除文件失败,回滚数据库操作
-                session.rollback()
-                logger.error(f'删除模型文件失败: {str(e)}')
-                return jsonify({'error': f'删除模型文件失败: {str(e)}'}), 500
+                session.rollback() 
+                logger.warning(f'模型文件不存在: {model_path}')
+        except OSError as e:
+            # 如果删除文件失败,回滚数据库操作
+            session.rollback() 
+            logger.error(f'删除模型文件失败: {str(e)}')
+            return jsonify({'error': f'删除模型文件失败: {str(e)}'}), 500
 
         # 3. 如果需要删除关联的数据集
         if delete_dataset and dataset_id:
@@ -613,7 +736,7 @@ def delete_model(model_id, delete_dataset=False):
 
         response_data = {
             'message': '模型删除成功',
-            'deleted_files': [model_file]
+            'deleted_files': [model_path]
         }
         
         if delete_dataset:
@@ -679,11 +802,13 @@ def update_threshold():
     更新训练阈值的API接口
     
     @body_param threshold: 新的阈值值(整数)
+    @body_param data_type: 数据类型 ('reduce' 或 'reflux')
     @return: JSON响应
     """
     try:
         data = request.get_json()
         new_threshold = data.get('threshold')
+        data_type = data.get('data_type')
         
         # 验证新阈值
         if not isinstance(new_threshold, (int, float)) or new_threshold <= 0:
@@ -691,12 +816,22 @@ def update_threshold():
                 'error': '无效的阈值值,必须为正数'
             }), 400
             
+        # 验证数据类型
+        if data_type not in ['reduce', 'reflux']:
+            return jsonify({
+                'error': '无效的数据类型,必须为 "reduce" 或 "reflux"'
+            }), 400
+            
         # 更新当前应用的阈值配置
-        current_app.config['THRESHOLD'] = int(new_threshold)
+        if data_type == 'reduce':
+            current_app.config['THRESHOLD_REDUCE'] = int(new_threshold)
+        else:  # reflux
+            current_app.config['THRESHOLD_REFLUX'] = int(new_threshold)
         
         return jsonify({
             'success': True,
-            'message': f'阈值已更新为 {new_threshold}',
+            'message': f'{data_type} 阈值已更新为 {new_threshold}',
+            'data_type': data_type,
             'new_threshold': new_threshold
         })
         
@@ -712,16 +847,32 @@ def get_threshold():
     """
     获取当前训练阈值的API接口
     
+    @query_param data_type: 可选,数据类型 ('reduce' 或 'reflux')
     @return: JSON响应
     """
     try:
-        current_threshold = current_app.config['THRESHOLD']
-        default_threshold = current_app.config['DEFAULT_THRESHOLD']
+        data_type = request.args.get('data_type')
         
-        return jsonify({
-            'current_threshold': current_threshold,
-            'default_threshold': default_threshold
-        })
+        if data_type and data_type not in ['reduce', 'reflux']:
+            return jsonify({
+                'error': '无效的数据类型,必须为 "reduce" 或 "reflux"'
+            }), 400
+            
+        response = {}
+        
+        if data_type == 'reduce' or data_type is None:
+            response['reduce'] = {
+                'current_threshold': current_app.config['THRESHOLD_REDUCE'],
+                'default_threshold': current_app.config['DEFAULT_THRESHOLD_REDUCE']
+            }
+            
+        if data_type == 'reflux' or data_type is None:
+            response['reflux'] = {
+                'current_threshold': current_app.config['THRESHOLD_REFLUX'],
+                'default_threshold': current_app.config['DEFAULT_THRESHOLD_REFLUX']
+            }
+        
+        return jsonify(response)
         
     except Exception as e:
         logging.error(f"获取阈值失败: {str(e)}")
@@ -729,6 +880,7 @@ def get_threshold():
             'error': f'获取阈值失败: {str(e)}'
         }), 500
 
+
 @bp.route('/set-current-dataset/<string:data_type>/<int:dataset_id>', methods=['POST'])
 def set_current_dataset(data_type, dataset_id):
     """
@@ -790,6 +942,7 @@ def set_current_dataset(data_type, dataset_id):
     finally:
         session.close()
 
+
 @bp.route('/get-model-history/<string:data_type>', methods=['GET'])
 def get_model_history(data_type):
     """
@@ -850,6 +1003,7 @@ def get_model_history(data_type):
     finally:
         session.close()
 
+
 @bp.route('/batch-delete-datasets', methods=['POST'])
 def batch_delete_datasets():
     """
@@ -914,6 +1068,7 @@ def batch_delete_datasets():
         logger.error(f'批量删除数据集失败: {str(e)}')
         return jsonify({'error': str(e)}), 500
 
+
 @bp.route('/batch-delete-models', methods=['POST'])
 def batch_delete_models():
     """
@@ -979,6 +1134,7 @@ def batch_delete_models():
         logger.error(f'批量删除模型失败: {str(e)}')
         return jsonify({'error': str(e)}), 500
 
+
 @bp.route('/kriging_interpolation', methods=['POST'])
 def kriging_interpolation():
     try:
@@ -1001,107 +1157,28 @@ def kriging_interpolation():
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 
-# 显示切换模型
-@bp.route('/models', methods=['GET'])
-def get_models():
-    session = None
-    try:
-        # 创建 session
-        Session = sessionmaker(bind=db.engine)
-        session = Session()
-
-        # 查询所有模型
-        models = session.query(Models).all()
-
-        logger.debug(f"Models found: {models}")  # 打印查询的模型数据
-
-        if not models:
-            return jsonify({'message': 'No models found'}), 404
-
-        # 将模型数据转换为字典列表
-        models_list = [
-            {
-                'ModelID': model.ModelID,
-                'ModelName': model.Model_name,
-                'ModelType': model.Model_type,
-                'CreatedAt': model.Created_at.strftime('%Y-%m-%d %H:%M:%S'),
-                'Description': model.Description,
-                'DatasetID': model.DatasetID,
-                'ModelFilePath': model.ModelFilePath,
-                'DataType': model.Data_type,
-                'PerformanceScore': model.Performance_score,
-                'MAE': model.MAE,
-                'RMSE': model.RMSE
-            }
-            for model in models
-        ]
-
-        return jsonify(models_list), 200
-
-    except Exception as e:
-        return jsonify({'error': str(e)}), 400
-    finally:
-        if session:
-            session.close()
-
-
-# 定义提供数据库列表,用于展示表格的 API 接口
-@bp.route('/table', methods=['POST'])
-def get_table():
-    data = request.get_json()
-    table_name = data.get('table')
-    if not table_name:
-        return jsonify({'error': '需要表名'}), 400
-
-    try:
-        # 创建 sessionmaker 实例
-        Session = sessionmaker(bind=db.engine)
-        session = Session()
-
-        # 动态获取表的元数据
-        metadata = MetaData()
-        table = Table(table_name, metadata, autoload_with=db.engine)
-
-        # 从数据库中查询所有记录
-        query = select(table)
-        result = session.execute(query).fetchall()
-
-        # 将结果转换为列表字典形式
-        rows = [dict(zip([column.name for column in table.columns], row)) for row in result]
-
-        # 获取列名
-        headers = [column.name for column in table.columns]
-
-        return jsonify(rows=rows, headers=headers), 200
-
-    except Exception as e:
-        return jsonify({'error': str(e)}), 400
-    finally:
-        # 关闭 session
-        session.close()
-
 
 @bp.route('/model-scatter-data/<int:model_id>', methods=['GET'])
 def get_model_scatter_data(model_id):
     """
     获取指定模型的散点图数据(真实值vs预测值)
-
+    
     @param model_id: 模型ID
     @return: JSON响应,包含散点图数据
     """
     Session = sessionmaker(bind=db.engine)
     session = Session()
-
+    
     try:
         # 查询模型信息
         model = session.query(Models).filter_by(ModelID=model_id).first()
         if not model:
             return jsonify({'error': '未找到指定模型'}), 404
-
+            
         # 加载模型
         with open(model.ModelFilePath, 'rb') as f:
             ML_model = pickle.load(f)
-
+            
         # 根据数据类型加载测试数据
         if model.Data_type == 'reflux':
             X_test = pd.read_csv('uploads/data/X_test_reflux.csv')
@@ -1111,23 +1188,23 @@ def get_model_scatter_data(model_id):
             Y_test = pd.read_csv('uploads/data/Y_test_reduce.csv')
         else:
             return jsonify({'error': '不支持的数据类型'}), 400
-
+            
         # 获取预测值
         y_pred = ML_model.predict(X_test)
-
+        
         # 生成散点图数据
         scatter_data = [
-            [float(true), float(pred)]
+            [float(true), float(pred)] 
             for true, pred in zip(Y_test.iloc[:, 0], y_pred)
         ]
-
+        
         # 计算R²分数
         r2 = r2_score(Y_test, y_pred)
-
+        
         # 获取数据范围,用于绘制对角线
         y_min = min(min(Y_test.iloc[:, 0]), min(y_pred))
         y_max = max(max(Y_test.iloc[:, 0]), max(y_pred))
-
+        
         return jsonify({
             'scatter_data': scatter_data,
             'r2_score': float(r2),
@@ -1135,10 +1212,10 @@ def get_model_scatter_data(model_id):
             'model_name': model.Model_name,
             'model_type': model.Model_type
         }), 200
-
+        
     except Exception as e:
         logger.error(f'获取模型散点图数据失败: {str(e)}', exc_info=True)
         return jsonify({'error': f'获取数据失败: {str(e)}'}), 500
-
+        
     finally:
         session.close()

二進制
api/dump.rdb


+ 126 - 0
api/model_optimize/RF_early.py

@@ -0,0 +1,126 @@
+import pandas as pd
+import numpy as np
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import GridSearchCV, train_test_split
+from sklearn.metrics import mean_squared_error, r2_score
+from model_saver import save_model
+import time
+
+# 早停法实现函数
+def incremental_training(model, X_train, y_train, X_val, y_val, init_trees=10, step=10, max_trees=500, patience=5):
+    """
+    使用早停法增量训练随机森林模型
+    
+    @param {RandomForestRegressor} model - 随机森林模型
+    @param {DataFrame} X_train - 训练特征
+    @param {Series} y_train - 训练标签
+    @param {DataFrame} X_val - 验证特征
+    @param {Series} y_val - 验证标签
+    @param {int} init_trees - 初始树数量
+    @param {int} step - 每次增加的树数量
+    @param {int} max_trees - 最大树数量
+    @param {int} patience - 允许的停滞轮次
+    @return {int} - 最佳树数量
+    """
+    best_score = -np.inf
+    best_n = init_trees
+    no_improve = 0
+    
+    # 初始化模型参数
+    model.set_params(n_estimators=init_trees, warm_start=True)
+    
+    for n in range(init_trees, max_trees + step, step):
+        model.n_estimators = n
+        model.fit(X_train, y_train)
+        current_score = model.score(X_val, y_val)
+        
+        if current_score > best_score:
+            best_score = current_score
+            best_n = n
+            no_improve = 0
+        else:
+            no_improve += 1
+            
+        print(f"当前树数量: {n} | 验证集 R²: {current_score:.4f} | 最佳 R²: {best_score:.4f}")
+        
+        if no_improve >= patience:
+            print(f"在 {n} 棵树时早停")
+            break
+            
+    return best_n
+
+# 数据加载与预处理
+data = pd.read_excel('model_optimize\data\Acidity_reduce_new.xlsx')
+x = data.iloc[:,1:]
+y = data.iloc[:,0]
+x.columns = ['pH', 'OM', 'CL', 'H', 'Al']
+y.name = 'target'
+
+# 数据拆分(新增验证集)
+X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
+X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
+
+start = time.time()
+
+# 阶段一:使用早停法找到合适的树数量(使用默认参数)
+print("阶段一:使用早停法确定树的数量")
+base_model = RandomForestRegressor(
+    random_state=40,
+    warm_start=True  # 启用增量训练
+)
+
+# 执行早停法
+optimal_trees = incremental_training(
+    model=base_model,
+    X_train=X_train,
+    y_train=y_train,
+    X_val=X_val,
+    y_val=y_val,
+    init_trees=2,    # 初始树的数量
+    step=2,         # 每次增加的树数量
+    max_trees=100,   # 最大树数量
+    patience=30       # 允许的停滞轮次
+)
+
+print(f"\n最佳树数量: {optimal_trees}")
+
+# 阶段二:网格搜索调优结构参数
+print("\n阶段二:网格搜索调优其他超参数")
+param_grid = {
+    'max_depth': [None, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20],
+    'min_samples_split': [2, 5],
+    'min_samples_leaf': [1, 2],
+}
+
+grid_search = GridSearchCV(
+    estimator=RandomForestRegressor(n_estimators=optimal_trees, random_state=40),
+    param_grid=param_grid,
+    cv=5,
+    scoring='r2',
+    n_jobs=-1,
+    verbose=2
+)
+grid_search.fit(X_train, y_train)
+
+best_params = grid_search.best_params_
+print("\n阶段二最佳参数:", best_params)
+
+# 最终模型训练
+final_model = RandomForestRegressor(
+    **best_params,
+    n_estimators=optimal_trees,
+    random_state=40
+).fit(pd.concat([X_train, X_val]), pd.concat([y_train, y_val]))
+
+# 模型评估
+test_pred = final_model.predict(X_test)
+print(f"\n最终模型性能:")
+print(f"测试集 R²: {r2_score(y_test, test_pred):.3f}")
+print(f"测试集 RMSE: {np.sqrt(mean_squared_error(y_test, test_pred)):.3f}")
+
+# 模型保存
+save_model(final_model, 'model_optimize\pkl', 'rf_model_')
+
+# 时间统计
+end = time.time()
+print(f"\n总耗时: {end - start:.1f}秒")

+ 18 - 20
api/model_optimize/RF_filt.py

@@ -57,14 +57,14 @@ from sklearn.metrics import roc_auc_score
 # y.name = 'target_ph'
 
 ## 精准降酸数据
-data=pd.read_excel('model_optimize\data\Acidity_reduce.xlsx')
+data=pd.read_excel('model_optimize\data\Acidity_reduce_new.xlsx')
 
 x = data.iloc[:,1:]
 y = data.iloc[:,0]
 # 为 x 赋予列名
 x.columns = [
     'pH',        
-    'OM',              
+    'OM', 
     'CL', 
     'H',
     'Al'
@@ -75,25 +75,24 @@ y.name = 'target'
 Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42)
 
 # 筛选随机种子
-score_5cv_all = []
-for i in range(0, 200, 1):
-    rfc =RandomForestRegressor(random_state=i)
-    score_5cv =cross_val_score(rfc, Xtrain, Ytrain, cv=5).mean()
-    score_5cv_all.append(score_5cv)
-    pass
-score_max_5cv = max(score_5cv_all)
+# score_5cv_all = []
+# for i in range(0, 200, 1):
+#     rfc =RandomForestRegressor(random_state=i)
+#     score_5cv =cross_val_score(rfc, Xtrain, Ytrain, cv=5).mean()
+#     score_5cv_all.append(score_5cv)
+#     pass
+# score_max_5cv = max(score_5cv_all)
 
-random_state_5cv = range(0, 200)[score_5cv_all.index(max(score_5cv_all))] # 5cv最大得分对应的随机种子
-
-print("最大5cv得分:{}".format(score_max_5cv),
-      "random_5cv:{}".format(random_state_5cv))
+# random_state_5cv = range(0, 200)[score_5cv_all.index(max(score_5cv_all))] # 5cv最大得分对应的随机种子
 
+# print("最大5cv得分:{}".format(score_max_5cv),
+#       "random_5cv:{}".format(random_state_5cv))
+random_state_5cv = 40
 
 # 筛选随机树数目
 score_5cv_all = []
-for i in range(1, 400, 1):
-    rfc = RandomForestRegressor(n_estimators=i,
-        random_state=random_state_5cv)
+for i in range(1, 200, 1):
+    rfc = RandomForestRegressor(n_estimators=i, random_state=random_state_5cv)
     score_5cv = cross_val_score(rfc, Xtrain, Ytrain, cv=5).mean()
     score_5cv_all.append(score_5cv)
     pass
@@ -107,7 +106,7 @@ score_test_all = []
 
 # 筛选最大深度
 score_5cv_all = []
-for i in range(1, 300, 1):
+for i in range(1, 200, 1):
     rfc = RandomForestRegressor(n_estimators=n_est_5cv
                                 , random_state=random_state_5cv
                                 , max_depth=i)
@@ -116,8 +115,7 @@ for i in range(1, 300, 1):
     pass
 score_max_5cv = max(score_5cv_all)
 max_depth_5cv = range(1,300)[score_5cv_all.index(score_max_5cv)]    
-print(
-      "最大5cv得分:{}".format(score_max_5cv),
+print("最大5cv得分:{}".format(score_max_5cv),
       "max_depth_5cv:{}".format(max_depth_5cv))      
 
 # 确定参数进行训练
@@ -136,6 +134,6 @@ print("rmse_test",rmse2)
 
 # 保存训练好的模型
 custom_path='model_optimize\pkl'            # 模型保存路径
-prefix='rf_model_'                          # 模型文件名前缀
+prefix='rf_model_raw_'                          # 模型文件名前缀
 save_model(rfc, custom_path, prefix)
 

+ 56 - 0
api/model_optimize/RF_opt.py

@@ -0,0 +1,56 @@
+import pandas as pd
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import GridSearchCV, train_test_split
+from sklearn.metrics import mean_squared_error, r2_score
+from model_saver import save_model
+
+# 统计筛选时间
+import time
+start = time.time()
+
+# 数据加载与划分
+data = pd.read_excel('model_optimize\data\Acidity_reduce_new.xlsx')
+x = data.iloc[:,1:]
+y = data.iloc[:,0]
+x.columns = ['pH', 'OM', 'CL', 'H', 'Al']
+y.name = 'target'
+
+Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42)
+
+# 网格搜索调参
+param_grid = {
+    # 遍历2至50,以及100
+    'n_estimators': [i for i in range(2, 100)] + [200],
+    # 遍历2至10,以及20
+    'max_depth': [None, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20],
+    'min_samples_split': [2, 5],
+    'min_samples_leaf': [1, 2],
+}
+
+grid_search = GridSearchCV(
+    estimator=RandomForestRegressor(random_state=40),
+    param_grid=param_grid,
+    cv=5,
+    # scoring='neg_mean_squared_error',
+    scoring='r2',
+    n_jobs=-1,
+    verbose=2
+)
+grid_search.fit(Xtrain, Ytrain)
+
+# 最优模型评估
+best_model = grid_search.best_estimator_
+train_pred = best_model.predict(Xtrain)
+test_pred = best_model.predict(Xtest)
+
+print(f"Best Params: {grid_search.best_params_}")
+print(f"Train R²: {r2_score(Ytrain, train_pred):.3f}")
+print(f"Test R²: {r2_score(Ytest, test_pred):.3f}")
+print(f"Test RMSE: {mean_squared_error(Ytest, test_pred, squared=False):.3f}")
+
+# 模型保存
+save_model(best_model, 'model_optimize\pkl', 'rf_model_')
+
+# 统计筛选时间
+end = time.time()
+print(f"Time: {end - start:.3f}s")

+ 80 - 0
api/model_optimize/RF_web.py

@@ -0,0 +1,80 @@
+import pandas as pd
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split, cross_val_score
+from sklearn.metrics import mean_squared_error, r2_score
+from model_saver import save_model
+
+def train_random_forest(X_train, y_train):
+    """
+    训练随机森林回归模型并优化超参数
+    
+    @param {DataFrame} X_train - 训练特征数据
+    @param {Series} y_train - 训练目标数据
+    @return {tuple} - 返回最佳模型和最佳参数(模型, 最佳树数量, 最佳深度)
+    """
+    best_score = -float('inf')
+    best_n_estimators = None
+    best_max_depth = None
+    random_state = 40
+
+    # 筛选最佳的树的数量
+    for n_estimators in range(1, 20, 1):
+        model = RandomForestRegressor(n_estimators=n_estimators, random_state=random_state)
+        score = cross_val_score(model, X_train, y_train, cv=5).mean()
+        if score > best_score:
+            best_score = score
+            best_n_estimators = n_estimators
+
+    print(f"Best number of trees: {best_n_estimators}, Score: {best_score}")
+
+    # 在找到的最佳树的数量基础上,筛选最佳的最大深度
+    best_score = 0  # 重置最佳得分,为最大深度优化做准备
+    for max_depth in range(1, 5, 1):
+        model = RandomForestRegressor(n_estimators=best_n_estimators, max_depth=max_depth, random_state=random_state)
+        score = cross_val_score(model, X_train, y_train, cv=5).mean()
+        if score > best_score:
+            best_score = score
+            best_max_depth = max_depth
+
+    print(f"Best max depth: {best_max_depth}, Score: {best_score}")
+
+    # 使用最佳的树的数量和最大深度训练最终模型
+    best_model = RandomForestRegressor(n_estimators=best_n_estimators, max_depth=best_max_depth,
+                                       random_state=random_state)
+
+    # 传入列名进行训练
+    best_model.fit(X_train, y_train)
+    # 指定传入的特征名
+    best_model.feature_names_in_ = X_train.columns
+    return best_model, best_n_estimators, best_max_depth
+
+
+# 统计筛选时间
+import time
+start = time.time()
+
+# 数据加载与划分
+data = pd.read_excel('model_optimize\data\Acidity_reduce_new.xlsx')
+x = data.iloc[:,1:]
+y = data.iloc[:,0]
+x.columns = ['pH', 'OM', 'CL', 'H', 'Al']
+y.name = 'target'
+
+Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42)
+
+# 最优模型评估
+best_model, best_n_estimators, best_max_depth = train_random_forest(Xtrain, Ytrain)
+train_pred = best_model.predict(Xtrain)
+test_pred = best_model.predict(Xtest)
+
+print(f"Best Params: {{'n_estimators': {best_n_estimators}, 'max_depth': {best_max_depth}, 'min_samples_split': 2, 'min_samples_leaf': 1}}")
+print(f"Train R²: {r2_score(Ytrain, train_pred):.3f}")
+print(f"Test R²: {r2_score(Ytest, test_pred):.3f}")
+print(f"Test RMSE: {mean_squared_error(Ytest, test_pred, squared=False):.3f}")
+
+# 模型保存
+save_model(best_model, 'model_optimize\pkl', 'rf_model_web_')
+
+# 统计筛选时间
+end = time.time()
+print(f"Time: {end - start:.3f}s")

二進制
api/model_optimize/data/Acidity_reduce_new - 1.xlsx


二進制
api/model_optimize/data/Acidity_reduce_new - 2.xlsx


二進制
api/model_optimize/data/Acidity_reduce_new - 3.xlsx


+ 313 - 0
api/model_optimize/model_compare.py

@@ -0,0 +1,313 @@
+import os
+import pandas as pd
+import numpy as np
+import joblib
+from sklearn.metrics import mean_squared_error, r2_score
+import matplotlib.pyplot as plt
+import seaborn as sns
+from datetime import datetime
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split
+
+def load_models(model_dir):
+    """
+    加载指定目录中的所有模型
+    
+    @param {string} model_dir - 模型存储目录路径
+    @return {dict} - 模型名称和模型对象的字典
+    """
+    models = {}
+    for filename in os.listdir(model_dir):
+        if filename.endswith('.pkl'):
+            model_path = os.path.join(model_dir, filename)
+            model_name = os.path.splitext(filename)[0]
+            try:
+                model = joblib.load(model_path)
+                models[model_name] = model
+                print(f"成功加载模型: {model_name}")
+            except Exception as e:
+                print(f"加载模型 {model_name} 时出错: {str(e)}")
+    return models
+
+def evaluate_models(models, X_test, y_test):
+    """
+    评估所有模型的性能
+    
+    @param {dict} models - 模型名称和模型对象的字典
+    @param {DataFrame} X_test - 测试特征数据
+    @param {Series} y_test - 测试目标数据
+    @return {DataFrame} - 包含各模型评估指标的数据框
+    """
+    results = []
+    
+    for name, model in models.items():
+        try:
+            # 预测
+            y_pred = model.predict(X_test)
+            
+            # 计算评估指标
+            rmse = mean_squared_error(y_test, y_pred, squared=False)
+            r2 = r2_score(y_test, y_pred)
+            
+            # 存储结果
+            results.append({
+                'model_name': name,
+                'rmse': rmse,
+                'r2': r2
+            })
+            
+            print(f"模型 {name} - RMSE: {rmse:.4f}, R²: {r2:.4f}")
+        except Exception as e:
+            print(f"评估模型 {name} 时出错: {str(e)}")
+    
+    # 转换为DataFrame并排序
+    results_df = pd.DataFrame(results)
+    return results_df
+
+def select_best_model(results_df, metric='r2', higher_better=True):
+    """
+    根据指定指标选择最佳模型
+    
+    @param {DataFrame} results_df - 包含各模型评估指标的数据框
+    @param {string} metric - 用于选择的指标名称
+    @param {boolean} higher_better - 指标值是否越高越好
+    @return {string} - 最佳模型名称
+    """
+    if higher_better:
+        best_idx = results_df[metric].idxmax()
+    else:
+        best_idx = results_df[metric].idxmin()
+    
+    best_model = results_df.loc[best_idx, 'model_name']
+    print(f"根据 {metric} 指标,最佳模型是: {best_model}")
+    return best_model
+
+def visualize_results(results_df):
+    """
+    可视化各模型的性能比较
+    
+    @param {DataFrame} results_df - 包含各模型评估指标的数据框
+    """
+    # 设置字体,使用通用字体
+    plt.rcParams['font.sans-serif'] = ['DejaVu Sans', 'Arial']
+    plt.rcParams['axes.unicode_minus'] = False
+    
+    # 创建图形
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
+    
+    # RMSE比较图
+    sns.barplot(x='model_name', y='rmse', data=results_df, ax=ax1)
+    ax1.set_title('RMSE Comparison of Models')
+    ax1.set_xlabel('Model Name')
+    ax1.set_ylabel('RMSE (Lower is better)')
+    ax1.tick_params(axis='x', rotation=45)
+    
+    # R²比较图
+    sns.barplot(x='model_name', y='r2', data=results_df, ax=ax2)
+    ax2.set_title('R² Comparison of Models')
+    ax2.set_xlabel('Model Name')
+    ax2.set_ylabel('R² (Higher is better)')
+    ax2.tick_params(axis='x', rotation=45)
+    
+    plt.tight_layout()
+    
+    # 保存图表
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    plt.savefig(f'model_optimize/results/model_comparison_{timestamp}.png', dpi=300)
+    plt.show()
+
+def save_best_model(models, best_model_name, output_dir):
+    """
+    将最佳模型保存到指定目录
+    
+    @param {dict} models - 模型名称和模型对象的字典
+    @param {string} best_model_name - 最佳模型名称
+    @param {string} output_dir - 输出目录
+    """
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    
+    best_model = models[best_model_name]
+    output_path = os.path.join(output_dir, 'best_model.pkl')
+    joblib.dump(best_model, output_path)
+    print(f"最佳模型已保存至: {output_path}")
+
+def extract_and_retrain_model(model_path, X_train, y_train, X_test, y_test):
+    """
+    从现有模型中提取参数,使用这些参数在训练集上重新训练模型,然后在测试集上评估
+    
+    @param {string} model_path - 模型文件路径
+    @param {DataFrame} X_train - 训练特征数据
+    @param {Series} y_train - 训练目标数据
+    @param {DataFrame} X_test - 测试特征数据
+    @param {Series} y_test - 测试目标数据
+    @return {dict} - 包含原始模型和重训练模型评估结果的字典
+    """
+    try:
+        # 加载原始模型
+        original_model = joblib.load(model_path)
+        model_name = os.path.basename(model_path)
+        print(f"成功加载模型: {model_name}")
+        
+        # 提取模型参数
+        params = original_model.get_params()
+        print(f"提取的模型参数: {params}")
+        
+        # 使用原始模型直接在测试集上评估
+        y_pred_original = original_model.predict(X_test)
+        rmse_original = mean_squared_error(y_test, y_pred_original, squared=False)
+        r2_original = r2_score(y_test, y_pred_original)
+        
+        # 使用提取的参数创建新模型并在训练集上训练
+        new_model = RandomForestRegressor(**params)
+        new_model.fit(X_train, y_train)
+        
+        # 在测试集上评估新训练的模型
+        y_pred_new = new_model.predict(X_test)
+        rmse_new = mean_squared_error(y_test, y_pred_new, squared=False)
+        r2_new = r2_score(y_test, y_pred_new)
+        
+        # 返回结果
+        results = {
+            'model_name': model_name,
+            'original': {
+                'rmse': rmse_original,
+                'r2': r2_original
+            },
+            'retrained': {
+                'rmse': rmse_new,
+                'r2': r2_new,
+                'model': new_model
+            },
+            'parameters': params
+        }
+        
+        print(f"原始模型 {model_name} - RMSE: {rmse_original:.4f}, R²: {r2_original:.4f}")
+        print(f"重训练模型 {model_name} - RMSE: {rmse_new:.4f}, R²: {r2_new:.4f}")
+        
+        return results
+    
+    except Exception as e:
+        print(f"处理模型时出错: {str(e)}")
+        return None
+
+def visualize_comparison(original_results, retrained_results):
+    """
+    可视化原始模型和重训练模型的性能比较
+    
+    @param {dict} original_results - 原始模型的评估结果
+    @param {dict} retrained_results - 重训练模型的评估结果
+    """
+    # 设置字体,使用通用字体
+    plt.rcParams['font.sans-serif'] = ['DejaVu Sans', 'Arial']
+    plt.rcParams['axes.unicode_minus'] = False
+    
+    # 创建图形
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
+    
+    # 准备数据
+    model_names = ['Original Model', 'Retrained Model']
+    rmse_values = [original_results['rmse'], retrained_results['rmse']]
+    r2_values = [original_results['r2'], retrained_results['r2']]
+    
+    # RMSE比较图
+    ax1.bar(model_names, rmse_values, color=['blue', 'orange'])
+    ax1.set_title('RMSE Comparison')
+    ax1.set_ylabel('RMSE (Lower is better)')
+    
+    # 在柱状图上添加数值标签
+    for i, v in enumerate(rmse_values):
+        ax1.text(i, v + 0.01, f'{v:.4f}', ha='center')
+    
+    # R²比较图
+    ax2.bar(model_names, r2_values, color=['blue', 'orange'])
+    ax2.set_title('R² Comparison')
+    ax2.set_ylabel('R² (Higher is better)')
+    
+    # 在柱状图上添加数值标签
+    for i, v in enumerate(r2_values):
+        ax2.text(i, v + 0.01, f'{v:.4f}', ha='center')
+    
+    plt.tight_layout()
+    
+    # 保存图表
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    plt.savefig(f'model_optimize/results/model_retrain_comparison_{timestamp}.png', dpi=300)
+    plt.show()
+
+if __name__ == "__main__":
+    # 加载数据
+    data = pd.read_excel('model_optimize/data/Acidity_reduce_new.xlsx')
+    X = data.iloc[:, 1:]
+    y = data.iloc[:, 0]
+    X.columns = ['pH', 'OM', 'CL', 'H', 'Al']
+    y.name = 'target'
+    
+    # 划分训练集和测试集
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    
+    # 确保结果目录存在
+    if not os.path.exists('model_optimize/results'):
+        os.makedirs('model_optimize/results')
+    
+    # # 第一部分:评估特定模型并重新训练
+    # specific_model_path = r'pkl\rf_model_0308_1619.pkl'
+    
+    # if os.path.exists(specific_model_path):
+    #     print("\n===== 特定模型参数提取与重训练评估 =====")
+    #     # 提取参数并重新训练
+    #     results = extract_and_retrain_model(specific_model_path, X_train, y_train, X_test, y_test)
+        
+    #     if results:
+    #         # 可视化比较结果
+    #         visualize_comparison(results['original'], results['retrained'])
+            
+    #         # 保存重训练的模型
+    #         retrained_model = results['retrained']['model']
+    #         output_dir = 'model_optimize/retrained_models'
+    #         if not os.path.exists(output_dir):
+    #             os.makedirs(output_dir)
+            
+    #         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    #         output_path = os.path.join(output_dir, f'retrained_model_{timestamp}.pkl')
+    #         joblib.dump(retrained_model, output_path)
+    #         print(f"重训练模型已保存至: {output_path}")
+            
+    #         # 保存模型参数到文本文件
+    #         params_output = os.path.join(output_dir, f'model_parameters_{timestamp}.txt')
+    #         with open(params_output, 'w') as f:
+    #             for param, value in results['parameters'].items():
+    #                 f.write(f"{param}: {value}\n")
+    #         print(f"模型参数已保存至: {params_output}")
+    # else:
+    #     print(f"指定的模型文件不存在: {specific_model_path}")
+    
+    # 第二部分:原有的模型比较代码
+    print("\n===== 所有模型性能比较 =====")
+    # 加载所有模型
+    models = load_models('model_optimize/pkl')
+    
+    if models:
+        # 评估模型
+        results_df = evaluate_models(models, X, y)
+        
+        # 保存评估结果
+        # timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        # results_df.to_csv(f'model_optimize/results/model_comparison_{timestamp}.csv', index=False)
+        
+        # 选择最佳模型 (基于R²)
+        best_model_r2 = select_best_model(results_df, metric='r2', higher_better=True)
+        
+        # 选择最佳模型 (基于RMSE)
+        best_model_rmse = select_best_model(results_df, metric='rmse', higher_better=False)
+        
+        print(f"基于R²的最佳模型: {best_model_r2}")
+        print(f"基于RMSE的最佳模型: {best_model_rmse}")
+        
+        # 可视化结果
+        visualize_results(results_df)
+        
+        # 保存最佳模型 (这里使用R²作为选择标准)
+        # save_best_model(models, best_model_r2, 'model_optimize/best_model')
+    else:
+        print("没有找到可用的模型") 

+ 36 - 0
api/model_optimize/model_parameters.py

@@ -0,0 +1,36 @@
+import pickle
+import os
+
+def load_model(file_path):
+    """
+    加载保存的模型文件。
+    
+    :param file_path: 模型文件的路径
+    :return: 加载的模型
+    """
+    with open(file_path, 'rb') as f:
+        model = pickle.load(f)
+    return model
+
+# 你需要替换为实际的文件路径
+# file_path = 'model_optimize/pkl/rf_model_1207_1530.pkl'
+# file_path = r'pkl\rf_model_0308_1619.pkl'
+file_path = r'pkl\rf_model_0308_1632.pkl'
+
+
+# 加载模型
+loaded_model = load_model(file_path)
+
+# 读取模型的超参数
+n_estimators = loaded_model.n_estimators
+max_depth = loaded_model.max_depth
+min_samples_split = loaded_model.min_samples_split
+min_samples_leaf = loaded_model.min_samples_leaf
+max_features = loaded_model.max_features
+
+# 打印超参数
+print(f"n_estimators: {n_estimators}")
+print(f"max_depth: {max_depth}")
+print(f"min_samples_split: {min_samples_split}")
+print(f"min_samples_leaf: {min_samples_leaf}")
+print(f"max_features: {max_features}")

+ 3 - 11
api/run.py

@@ -1,9 +1,10 @@
-
 from flask import request
+
 from app import create_app
 import os
 # 创建 Flask 应用
 app = create_app()
+
 # 使用 HTTPS
 context = ('ssl/cert.crt', 'ssl/cert.key')
 @app.before_request
@@ -12,16 +13,7 @@ def force_https():
         url = request.url.replace('http://', 'https://', 1)
         from flask import redirect
         return redirect(url, code=301)
-# 设置 secret_key
-app.secret_key = os.urandom(24)  # 使用随机生成的安全密钥
-# 使用 HTTPS
-context = ('ssl/cert.crt', 'ssl/cert.key')
-#@app.before_request
-#def force_https():
-#    if not request.is_secure:
-#        url = request.url.replace('http://', 'https://', 1)
-#       from flask import redirect
-#        return redirect(url, code=301)
+
 
 # 启动服务器
 if __name__ == '__main__':

+ 9 - 0
api/run_celery.bat

@@ -0,0 +1,9 @@
+@echo off
+echo Starting Redis server...
+start "" D:\Redis\Redis-x64-5.0.14.1\redis-server.exe D:\Redis\Redis-x64-5.0.14.1\redis.windows.conf
+
+echo Waiting for Redis to start...
+timeout /t 5 >nul
+
+echo Starting Celery worker...
+watchfiles --filter python "celery -A app.celery_app.celery worker --loglevel=info" .

+ 37 - 30
shoping/thres/thres.js

@@ -1,33 +1,33 @@
 // pages/threshold/threshold.js
 Page({
   data: {
-    threshold: null,     // 初始化为null
-    newThreshold: null,
-    loading: true       // 添加加载状态
+    refluxThreshold: null,  // 反酸阈值
+    reduceAcidThreshold: null, // 降酸阈值
+    newReflux: null,        // 新反酸阈值输入
+    newReduce: null,       // 新降酸阈值输入
+    loading: true
   },
 
   onLoad() {
-    this.getThreshold();
+    this.getThresholds();
   },
 
-  // 获取当前阈值(优化版)
-  getThreshold() {
+  // 获取双阈值
+  getThresholds() {
     this.setData({ loading: true });
     wx.request({
       url: 'https://soilgd.com:5000/get-threshold',
+      method: 'GET',
       success: (res) => {
-        if (res.statusCode === 200 && res.data && typeof res.data.current_threshold === 'number') {
+        if (res.statusCode === 200 && res.data) {
           this.setData({
-            threshold: res.data.current_threshold,
+            reduceThreshold: res.data.reduce.current_threshold || null,
+            refluxThreshold: res.data.reflux.current_threshold || null
           });
-        } else {
-          wx.showToast({ title: '数据格式错误', icon: 'error' });
-          console.error('Invalid response:', res);
         }
       },
       fail: (err) => {
-        wx.showToast({ title: '请求失败', icon: 'error' });
-        console.error('API Error:', err);
+        wx.showToast({ title: '获取失败', icon: 'error' });
       },
       complete: () => {
         this.setData({ loading: false });
@@ -35,19 +35,30 @@ Page({
     });
   },
 
-  // 输入框改变事件(添加校验)
+  // 统一输入处理
   onInput(e) {
+    const type = e.currentTarget.dataset.type;
     const value = Number(e.detail.value);
+    const field = `new${type.charAt(0).toUpperCase() + type.slice(1)}`;
+    
     if (!isNaN(value)) {
-      this.setData({
-        newThreshold: value
-      });
+      this.setData({ [field]: value });
     }
   },
 
-  // 提交更新阈值(优化版)
-  updateThreshold() {
-    if (this.data.newThreshold === null || isNaN(this.data.newThreshold)) {
+  // 统一更新处理
+  updateThreshold(e) {
+    const type = e.currentTarget.dataset.type;
+    const field = `new${type.charAt(0).toUpperCase() + type.slice(1)}`;
+    const value = this.data[field];
+
+    if (value === 0) {
+      wx.showToast({ title: '阈值不能为0', icon: 'error' });
+      this.setData({ [field]: null }); // 清空非法输入
+      return;
+    }
+
+    if (value === null || isNaN(value)) {
       wx.showToast({ title: '请输入有效数字', icon: 'error' });
       return;
     }
@@ -56,21 +67,17 @@ Page({
     wx.request({
       url: 'https://soilgd.com:5000/update-threshold',
       method: 'POST',
-      data: { threshold: this.data.newThreshold },
+      data: {
+        data_type: type === 'reduce' ? 'reduce' : 'reflux',
+        threshold: value
+      },
       success: (res) => {
         if (res.statusCode === 200) {
           wx.showToast({ title: '更新成功' });
-          this.setData({ threshold: this.data.newThreshold });
-        } else {
-          wx.showToast({ title: '更新失败', icon: 'error' });
+          this.setData({ [`${type}Threshold`]: value });
         }
       },
-      fail: (err) => {
-        wx.showToast({ title: '网络错误', icon: 'error' });
-      },
-      complete: () => {
-        wx.hideLoading();
-      }
+      complete: () => wx.hideLoading()
     });
   }
 });

+ 39 - 5
shoping/thres/thres.wxml

@@ -1,10 +1,10 @@
 <!-- pages/threshold/threshold.wxml -->
 <view class="container">
+  <!-- 反酸阈值卡片 -->
   <view class="card">
-    <text class="title">当前阈值</text>
-    
-    <block wx:if="{{threshold !== null}}">
-      <text class="value">{{threshold}}</text>
+    <text class="title">当前反酸模型阈值</text>
+    <block wx:if="{{refluxThreshold !== null}}">
+      <text class="value">{{refluxThreshold}}</text>
     </block>
     <block wx:else>
       <text class="value">--</text>
@@ -13,8 +13,9 @@
     <view class="input-area">
       <input 
         type="number" 
-        value="{{newThreshold}}" 
+        value="{{newReflux}}" 
         bindinput="onInput" 
+        data-type="reflux"
         placeholder="请输入新阈值"
         class="input"
         disabled="{{loading}}"
@@ -22,6 +23,39 @@
       <button 
         type="primary" 
         bindtap="updateThreshold" 
+        data-type="reflux"
+        class="button"
+        disabled="{{loading}}"
+      >
+        {{loading ? '加载中...' : '更新阈值'}}
+      </button>
+    </view>
+  </view>
+
+  <!-- 降酸阈值卡片 -->
+  <view class="card">
+    <text class="title">当前降酸模型阈值</text>
+    <block wx:if="{{reduceThreshold !== null}}">
+      <text class="value">{{reduceThreshold}}</text>
+    </block>
+    <block wx:else>
+      <text class="value">--</text>
+    </block>
+
+    <view class="input-area">
+      <input 
+        type="number" 
+        value="{{newReduce}}" 
+        bindinput="onInput" 
+        data-type="reduce"
+        placeholder="请输入新阈值"
+        class="input"
+        disabled="{{loading}}"
+      />
+      <button 
+        type="primary" 
+        bindtap="updateThreshold" 
+        data-type="reduce"
         class="button"
         disabled="{{loading}}"
       >