|
@@ -5,106 +5,253 @@ from .model import predict, train_and_save_model
|
|
|
import pandas as pd
|
|
|
from . import db # 从 app 包导入 db 实例
|
|
|
from sqlalchemy.engine.reflection import Inspector
|
|
|
-from .database_models import Model, ModelParameters, Dataset
|
|
|
+from .database_models import Models, ModelParameters, Datasets, CurrentReduce, CurrentReflux
|
|
|
import os
|
|
|
from .utils import create_dynamic_table, allowed_file
|
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
+from sqlalchemy.schema import MetaData, Table
|
|
|
+import logging
|
|
|
+from sqlalchemy import text, select
|
|
|
|
|
|
+# 配置日志
|
|
|
+logging.basicConfig(level=logging.DEBUG)
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
# 创建蓝图 (Blueprint),用于分离路由
|
|
|
bp = Blueprint('routes', __name__)
|
|
|
|
|
|
+def infer_column_types(df):
|
|
|
+ type_map = {
|
|
|
+ 'object': 'str',
|
|
|
+ 'int64': 'int',
|
|
|
+ 'float64': 'float',
|
|
|
+ 'datetime64[ns]': 'datetime' # 适应Pandas datetime类型
|
|
|
+ }
|
|
|
+ # 提取列和其数据类型
|
|
|
+ return {col: type_map.get(str(df[col].dtype), 'str') for col in df.columns}
|
|
|
+
|
|
|
|
|
|
@bp.route('/upload-dataset', methods=['POST'])
|
|
|
def upload_dataset():
|
|
|
try:
|
|
|
- # 检查是否包含文件
|
|
|
if 'file' not in request.files:
|
|
|
return jsonify({'error': 'No file part'}), 400
|
|
|
file = request.files['file']
|
|
|
+ if file.filename == '' or not allowed_file(file.filename):
|
|
|
+ return jsonify({'error': 'No selected file or invalid file type'}), 400
|
|
|
+
|
|
|
+ dataset_name = request.form.get('dataset_name')
|
|
|
+ dataset_description = request.form.get('dataset_description', 'No description provided')
|
|
|
+ dataset_type = request.form.get('dataset_type')
|
|
|
+ if not dataset_type:
|
|
|
+ return jsonify({'error': 'Dataset type is required'}), 400
|
|
|
+
|
|
|
+ # 创建 sessionmaker 实例
|
|
|
+ Session = sessionmaker(bind=db.engine)
|
|
|
+ session = Session()
|
|
|
+ new_dataset = Datasets(
|
|
|
+ Dataset_name=dataset_name,
|
|
|
+ Dataset_description=dataset_description,
|
|
|
+ Row_count=0,
|
|
|
+ Status='pending',
|
|
|
+ Dataset_type=dataset_type
|
|
|
+ )
|
|
|
+ session.add(new_dataset)
|
|
|
+ session.commit()
|
|
|
+
|
|
|
+ unique_filename = f"dataset_{new_dataset.Dataset_ID}.xlsx"
|
|
|
+ upload_folder = current_app.config['UPLOAD_FOLDER']
|
|
|
+ file_path = os.path.join(upload_folder, unique_filename)
|
|
|
+ file.save(file_path)
|
|
|
+
|
|
|
+ dataset_df = pd.read_excel(file_path)
|
|
|
+ new_dataset.Row_count = len(dataset_df)
|
|
|
+ new_dataset.Status = 'processed'
|
|
|
+ session.commit()
|
|
|
+
|
|
|
+ # 清理列名
|
|
|
+ dataset_df = clean_column_names(dataset_df)
|
|
|
+ # 重命名 DataFrame 列以匹配模型字段
|
|
|
+ dataset_df = rename_columns_for_model(dataset_df, dataset_type)
|
|
|
+
|
|
|
+ column_types = infer_column_types(dataset_df)
|
|
|
+ dynamic_table_class = create_dynamic_table(new_dataset.Dataset_ID, column_types)
|
|
|
+ insert_data_into_dynamic_table(session, dataset_df, dynamic_table_class)
|
|
|
+
|
|
|
+ # 根据 dataset_type 决定插入到哪个已有表
|
|
|
+ if dataset_type == 'reduce':
|
|
|
+ insert_data_into_existing_table(session, dataset_df, CurrentReduce)
|
|
|
+ elif dataset_type == 'reflux':
|
|
|
+ insert_data_into_existing_table(session, dataset_df, CurrentReflux)
|
|
|
+
|
|
|
+ session.commit()
|
|
|
|
|
|
- # 如果没有文件或者文件名为空
|
|
|
- if file.filename == '':
|
|
|
- return jsonify({'error': 'No selected file'}), 400
|
|
|
-
|
|
|
- # 检查文件类型是否允许
|
|
|
- if file and allowed_file(file.filename):
|
|
|
- # 获取数据集的元数据
|
|
|
- dataset_name = request.form.get('dataset_name')
|
|
|
- dataset_description = request.form.get('dataset_description', 'No description provided')
|
|
|
- dataset_type = request.form.get('dataset_type') # 新增字段:数据集类型
|
|
|
-
|
|
|
- # 校验 dataset_type 是否存在
|
|
|
- if not dataset_type:
|
|
|
- return jsonify({'error': 'Dataset type is required'}), 400
|
|
|
-
|
|
|
- # 创建 Dataset 实体并保存到数据库
|
|
|
- new_dataset = Dataset(
|
|
|
- DatasetName=dataset_name,
|
|
|
- DatasetDescription=dataset_description,
|
|
|
- RowCount=0, # 初步创建数据集时,行数先置为0
|
|
|
- Status='pending', # 状态默认为 'pending'
|
|
|
- DatasetType=dataset_type # 保存数据集类型
|
|
|
- )
|
|
|
- db.session.add(new_dataset)
|
|
|
- db.session.commit()
|
|
|
-
|
|
|
- # 获取数据集的 ID
|
|
|
- dataset_id = new_dataset.DatasetID
|
|
|
-
|
|
|
- # 保存文件时使用数据库的 DatasetID 作为文件名
|
|
|
- unique_filename = f"dataset_{dataset_id}.xlsx"
|
|
|
- upload_folder = current_app.config['UPLOAD_FOLDER']
|
|
|
- file_path = os.path.join(upload_folder, unique_filename)
|
|
|
-
|
|
|
- # 保存文件
|
|
|
- file.save(file_path)
|
|
|
-
|
|
|
- # 读取 Excel 文件内容
|
|
|
- dataset_df = pd.read_excel(file_path)
|
|
|
-
|
|
|
- # 更新数据集的行数
|
|
|
- row_count = len(dataset_df)
|
|
|
- new_dataset.RowCount = row_count
|
|
|
- new_dataset.Status = 'processed' # 状态更新为 processed
|
|
|
- db.session.commit()
|
|
|
-
|
|
|
- # 动态创建数据表
|
|
|
- columns = {}
|
|
|
- for col in dataset_df.columns:
|
|
|
- if dataset_df[col].dtype == 'int64':
|
|
|
- columns[col] = 'int'
|
|
|
- elif dataset_df[col].dtype == 'float64':
|
|
|
- columns[col] = 'float'
|
|
|
- else:
|
|
|
- columns[col] = 'str'
|
|
|
-
|
|
|
- # 创建新表格(动态表格)
|
|
|
- dynamic_table_class = create_dynamic_table(dataset_id, columns)
|
|
|
-
|
|
|
- # 创建新的数据库会话
|
|
|
- Session = sessionmaker(bind=db.engine)
|
|
|
- session = Session()
|
|
|
-
|
|
|
- # 将每一行数据插入到动态创建的表格中
|
|
|
- for _, row in dataset_df.iterrows():
|
|
|
- record_data = row.to_dict()
|
|
|
- # 将数据插入到新表格中
|
|
|
- session.execute(dynamic_table_class.__table__.insert(), [record_data])
|
|
|
-
|
|
|
- session.commit()
|
|
|
- session.close()
|
|
|
+ return jsonify({
|
|
|
+ 'message': f'Dataset {dataset_name} uploaded successfully!',
|
|
|
+ 'dataset_id': new_dataset.Dataset_ID,
|
|
|
+ 'filename': unique_filename
|
|
|
+ }), 201
|
|
|
|
|
|
- return jsonify({
|
|
|
- 'message': f'Dataset {dataset_name} uploaded successfully!',
|
|
|
- 'dataset_id': new_dataset.DatasetID,
|
|
|
- 'filename': unique_filename
|
|
|
- }), 201
|
|
|
+ except Exception as e:
|
|
|
+ if session:
|
|
|
+ session.rollback()
|
|
|
+ logging.error('Failed to process the dataset upload:', exc_info=True)
|
|
|
+ return jsonify({'error': str(e)}), 500
|
|
|
+ finally:
|
|
|
+ session.close()
|
|
|
|
|
|
- else:
|
|
|
- return jsonify({'error': 'Invalid file type'}), 400
|
|
|
+@bp.route('/train-and-save-model', methods=['POST'])
|
|
|
+def train_and_save_model_endpoint():
|
|
|
+ # 创建 sessionmaker 实例
|
|
|
+ Session = sessionmaker(bind=db.engine)
|
|
|
+ session = Session()
|
|
|
+
|
|
|
+ # 从请求中解析参数
|
|
|
+ data = request.get_json()
|
|
|
+ model_type = data.get('model_type')
|
|
|
+ model_name = data.get('model_name')
|
|
|
+ model_description = data.get('model_description')
|
|
|
+ data_type = data.get('data_type')
|
|
|
+ dataset_id = data.get('dataset_id', None) # 默认为 None,如果未提供
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 调用训练和保存模型的函数
|
|
|
+ result = train_and_save_model(session, model_type, model_name, model_description, data_type, dataset_id)
|
|
|
+
|
|
|
+ # 返回成功响应
|
|
|
+ return jsonify({'message': 'Model trained and saved successfully', 'result': result}), 200
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ session.rollback()
|
|
|
+ logging.error('Failed to process the dataset upload:', exc_info=True)
|
|
|
+ return jsonify({'error': 'Failed to train and save model', 'message': str(e)}), 500
|
|
|
+ finally:
|
|
|
+ session.close()
|
|
|
+
|
|
|
+
|
|
|
+def clean_column_names(dataframe):
|
|
|
+ # Strip whitespace and replace non-breaking spaces and other non-printable characters
|
|
|
+ dataframe.columns = [col.strip().replace('\xa0', '') for col in dataframe.columns]
|
|
|
+ return dataframe
|
|
|
+
|
|
|
+
|
|
|
+def rename_columns_for_model(dataframe, dataset_type):
|
|
|
+ if dataset_type == 'reduce':
|
|
|
+ rename_map = {
|
|
|
+ '1/b': 'Q_over_b',
|
|
|
+ 'pH': 'pH',
|
|
|
+ 'OM': 'OM',
|
|
|
+ 'CL': 'CL',
|
|
|
+ 'H': 'H',
|
|
|
+ 'Al': 'Al'
|
|
|
+ }
|
|
|
+ elif dataset_type == 'reflux':
|
|
|
+ rename_map = {
|
|
|
+ 'OM g/kg': 'OM',
|
|
|
+ 'CL g/kg': 'CL',
|
|
|
+ 'CEC cmol/kg': 'CEC',
|
|
|
+ 'H+ cmol/kg': 'H_plus',
|
|
|
+ 'HN mg/kg': 'HN',
|
|
|
+ 'Al3+cmol/kg': 'Al3_plus',
|
|
|
+ 'Free alumina g/kg': 'Free_alumina',
|
|
|
+ 'Free iron oxides g/kg': 'Free_iron_oxides',
|
|
|
+ 'ΔpH': 'Delta_pH'
|
|
|
+ }
|
|
|
+
|
|
|
+ # 使用 rename() 方法更新列名
|
|
|
+ dataframe = dataframe.rename(columns=rename_map)
|
|
|
+ return dataframe
|
|
|
+
|
|
|
+
|
|
|
+def insert_data_into_existing_table(session, dataframe, model_class):
|
|
|
+ """Insert data from a DataFrame into an existing SQLAlchemy model table."""
|
|
|
+ for index, row in dataframe.iterrows():
|
|
|
+ record = model_class(**row.to_dict())
|
|
|
+ session.add(record)
|
|
|
+
|
|
|
+def insert_data_into_dynamic_table(session, dataset_df, dynamic_table_class):
|
|
|
+ for _, row in dataset_df.iterrows():
|
|
|
+ record_data = row.to_dict()
|
|
|
+ session.execute(dynamic_table_class.__table__.insert(), [record_data])
|
|
|
+
|
|
|
+def insert_data_by_type(session, dataset_df, dataset_type):
|
|
|
+ if dataset_type == 'reduce':
|
|
|
+ for _, row in dataset_df.iterrows():
|
|
|
+ record = CurrentReduce(**row.to_dict())
|
|
|
+ session.add(record)
|
|
|
+ elif dataset_type == 'reflux':
|
|
|
+ for _, row in dataset_df.iterrows():
|
|
|
+ record = CurrentReflux(**row.to_dict())
|
|
|
+ session.add(record)
|
|
|
+
|
|
|
+
|
|
|
+def get_current_data(session, data_type):
|
|
|
+ # 根据数据类型选择相应的表模型
|
|
|
+ if data_type == 'reduce':
|
|
|
+ model = CurrentReduce
|
|
|
+ elif data_type == 'reflux':
|
|
|
+ model = CurrentReflux
|
|
|
+ else:
|
|
|
+ raise ValueError("Invalid data type provided. Choose 'reduce' or 'reflux'.")
|
|
|
+
|
|
|
+ # 从数据库中查询所有记录
|
|
|
+ result = session.execute(select(model))
|
|
|
+
|
|
|
+ # 将结果转换为DataFrame
|
|
|
+ dataframe = pd.DataFrame([dict(row) for row in result])
|
|
|
+ return dataframe
|
|
|
+
|
|
|
+def get_dataset_by_id(session, dataset_id):
|
|
|
+ # 动态获取表的元数据
|
|
|
+ metadata = MetaData(bind=session.bind)
|
|
|
+ dataset_table = Table(dataset_id, metadata, autoload=True, autoload_with=session.bind)
|
|
|
+
|
|
|
+ # 从数据库中查询整个表的数据
|
|
|
+ query = select(dataset_table)
|
|
|
+ result = session.execute(query).fetchall()
|
|
|
+
|
|
|
+ # 检查是否有数据返回
|
|
|
+ if not result:
|
|
|
+ raise ValueError(f"No data found for dataset {dataset_id}.")
|
|
|
+
|
|
|
+ # 将结果转换为DataFrame
|
|
|
+ dataframe = pd.DataFrame(result, columns=[column.name for column in dataset_table.columns])
|
|
|
+
|
|
|
+ return dataframe
|
|
|
+
|
|
|
+
|
|
|
+@bp.route('/delete-dataset/<int:dataset_id>', methods=['DELETE'])
|
|
|
+def delete_dataset(dataset_id):
|
|
|
+ # 创建 sessionmaker 实例
|
|
|
+ Session = sessionmaker(bind=db.engine)
|
|
|
+ session = Session()
|
|
|
+ try:
|
|
|
+ # 查询数据集
|
|
|
+ dataset = session.query(Datasets).filter_by(Dataset_ID=dataset_id).first()
|
|
|
+ if not dataset:
|
|
|
+ return jsonify({'error': 'Dataset not found'}), 404
|
|
|
+
|
|
|
+ # 删除文件
|
|
|
+ filename = f"dataset_{dataset.Dataset_ID}.xlsx"
|
|
|
+ file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename)
|
|
|
+ if os.path.exists(file_path):
|
|
|
+ os.remove(file_path)
|
|
|
+
|
|
|
+ # 删除数据表
|
|
|
+ table_name = f"dataset_{dataset.Dataset_ID}"
|
|
|
+ session.execute(text(f"DROP TABLE IF EXISTS {table_name}"))
|
|
|
+
|
|
|
+ # 删除数据集记录
|
|
|
+ session.delete(dataset)
|
|
|
+ session.commit()
|
|
|
+
|
|
|
+ return jsonify({'message': 'Dataset deleted successfully'}), 200
|
|
|
|
|
|
except Exception as e:
|
|
|
+ session.rollback()
|
|
|
+ logging.error(f'Failed to delete dataset {dataset_id}:', exc_info=True)
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
+ finally:
|
|
|
+ session.close()
|
|
|
|
|
|
|
|
|
@bp.route('/tables', methods=['GET'])
|
|
@@ -118,7 +265,7 @@ def list_tables():
|
|
|
@bp.route('/models/<int:model_id>', methods=['GET'])
|
|
|
def get_model(model_id):
|
|
|
try:
|
|
|
- model = Model.query.filter_by(ModelID=model_id).first()
|
|
|
+ model = Models.query.filter_by(ModelID=model_id).first()
|
|
|
if model:
|
|
|
return jsonify({
|
|
|
'ModelID': model.ModelID,
|
|
@@ -136,7 +283,7 @@ def get_model(model_id):
|
|
|
@bp.route('/models', methods=['GET'])
|
|
|
def get_all_models():
|
|
|
try:
|
|
|
- models = Model.query.all() # 获取所有模型数据
|
|
|
+ models = Models.query.all() # 获取所有模型数据
|
|
|
if models:
|
|
|
result = [
|
|
|
{
|
|
@@ -179,7 +326,7 @@ def get_all_model_parameters():
|
|
|
@bp.route('/models/<int:model_id>/parameters', methods=['GET'])
|
|
|
def get_model_parameters(model_id):
|
|
|
try:
|
|
|
- model = Model.query.filter_by(ModelID=model_id).first()
|
|
|
+ model = Models.query.filter_by(ModelID=model_id).first()
|
|
|
if model:
|
|
|
# 获取该模型的所有参数
|
|
|
parameters = [
|