Browse Source

补充数据导入数据库的脚本

yangtaodemon 3 tuần trước cách đây
mục cha
commit
d3b919b79c

+ 3 - 3
Water/Python_codes/Figure_raster_mapping.py

@@ -38,12 +38,12 @@ mapping raster绘图函数的各参数含义
 """
 
 def mapping_raster(shp_path, tif_path, color_map_name, title_name, output_path, output_size):
-    gdf = gpd.read_file(shp_path)#--------------------------------------------读取矢量边界
-    with rasterio.open(tif_path) as src:#-------------------------------------读取并裁剪栅格数据
+    gdf = gpd.read_file(shp_path)   #--------------------------------------------读取矢量边界
+    with rasterio.open(tif_path) as src:    #-------------------------------------读取并裁剪栅格数据
         geoms = [json.loads(gdf.to_json())["features"][0]["geometry"]]
         out_image, out_transform = mask(src, geoms, crop=True)
         out_meta = src.meta.copy()
-    raster = out_image[0].astype('float16')#----------------------------------------------------提取数据并处理无效值
+    raster = out_image[0].astype('float16') #----------------------------------------------------提取数据并处理无效值
     nodata = out_meta.get("nodata", None)
     if nodata is not None:
         raster[raster == nodata] = np.nan

+ 1 - 1
app/models/CropCd_input.py

@@ -35,7 +35,7 @@ class CropCdInputData(Base):
                             comment='石砾组分(>2mm)质量占比[%]')
 
     # 速效养分 (注意:数据库列名与实际含义的对应关系)
-    available_potassium = Column('AvaP', Float, nullable=True,
+    available_potassium = Column('AvaP_IDW', Float, nullable=True,
                                  comment='速效钾(mg/kg)(对应Soil_data.AvaK_IDW)')
     available_phosphorus = Column('AvaK_IDW', Float, nullable=True,
                                   comment='有效磷(mg/kg)(对应Soil_data.AvaP_IDW)')

+ 1 - 1
app/models/FluxCd_output.py

@@ -2,7 +2,7 @@ from sqlalchemy import Column, Integer, Float, ForeignKeyConstraint
 from app.database import Base
 
 
-class FluxCdOutput(Base):
+class FluxCdOutputData(Base):
     """
     通量镉预测模型输出数据模型
 

+ 1 - 1
app/models/__init__.py

@@ -18,4 +18,4 @@ from app.models.atmo_sample import *
 from app.models.atmo_company import *
 from app.models.water_sample import *
 from app.models.agricultural import *
-from app.models.cross_session import *
+from app.models.cross_section import *

+ 0 - 0
app/models/cross_session.py → app/models/cross_section.py


+ 30 - 0
migrations/versions/781090b305e2_edit_atmo_company.py

@@ -0,0 +1,30 @@
+"""edit_atmo_company
+
+Revision ID: 781090b305e2
+Revises: 81b859325353
+Create Date: 2025-07-17 15:44:55.190366
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '781090b305e2'
+down_revision = '81b859325353'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    """升级数据库到当前版本"""
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    """将数据库降级到上一版本"""
+    # ### commands auto generated by Alembic - please adjust! ###
+    pass
+    # ### end Alembic commands ###

+ 32 - 0
migrations/versions/81b859325353_edit_cropcd_input.py

@@ -0,0 +1,32 @@
+"""edit_CropCd_input
+
+Revision ID: 81b859325353
+Revises: a2cfbc6c2673
+Create Date: 2025-07-17 15:02:31.471682
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '81b859325353'
+down_revision = 'a2cfbc6c2673'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    """升级数据库到当前版本"""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('CropCd_input_data', sa.Column('AvaP_IDW', sa.Float(), nullable=True, comment='速效钾(mg/kg)(对应Soil_data.AvaK_IDW)'))
+    op.drop_column('CropCd_input_data', 'AvaP')
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    """将数据库降级到上一版本"""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('CropCd_input_data', sa.Column('AvaP', sa.DOUBLE_PRECISION(precision=53), autoincrement=False, nullable=True, comment='速效钾(mg/kg)(对应Soil_data.AvaK_IDW)'))
+    op.drop_column('CropCd_input_data', 'AvaP_IDW')
+    # ### end Alembic commands ###

+ 250 - 0
scripts/import_CropCd_input.py

@@ -0,0 +1,250 @@
+"""
+CropCd_input数据导入脚本
+@description: 从Excel文件读取CropCd_input数据并导入到CropCd_input_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+import re
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.CropCd_input import CropCdInputData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class CropCdInputDataImporter:
+    """
+    CropCd输入数据导入器
+
+    @description: 从Excel文件读取CropCd输入数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='CropCd_input'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'CropCd_input'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表(设计文档中的原始列名)
+        self.required_columns = [
+            'Farmland_ID', 'Sample_ID',
+            '002_0002IDW', '02_002IDW', '2_02IDW',
+            'AvaP_IDW', 'AvaK_IDW', 'SAvaK_IDW',
+            'TAl_IDW', 'TCa_IDW', 'TFe_IDW', 'TMg_IDW', 'TMn_IDW', 'TN_IDW', 'TS_IDW',
+            'solution'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 清洗列名
+            df.columns = [col for col in df.columns]
+            # 同时也要更新required_columns对应的清洗后列名
+            required_columns_clean = [col for col in self.required_columns]
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型:所有字段都要转
+            for col in required_columns_clean:
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 检查是否有无效的数值
+            if df[required_columns_clean].isnull().any().any():
+                logger.warning("发现无效的数值,将跳过这些行")
+                invalid_rows = df[df[required_columns_clean].isnull().any(axis=1)]
+                logger.warning(f"无效行数: {len(invalid_rows)}")
+                df = df.dropna(subset=required_columns_clean)
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(CropCdInputData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建CropCdInputData对象
+                            cropcd_input = CropCdInputData(
+                                farmland_id=int(row['Farmland_ID']),
+                                sample_id=int(row['Sample_ID']),
+                                silt_content=float(row['002_0002IDW']),   # 清洗后的列名
+                                sand_content=float(row['02_002IDW']),
+                                gravel_content=float(row['2_02IDW']),
+                                available_potassium=float(row['AvaP_IDW']),
+                                available_phosphorus=float(row['AvaK_IDW']),
+                                slow_available_potassium =float(row['SAvaK_IDW']),
+                                total_aluminum=float(row['TAl_IDW']),
+                                total_calcium=float(row['TCa_IDW']),
+                                total_iron=float(row['TFe_IDW']),
+                                total_magnesium=float(row['TMg_IDW']),
+                                total_manganese=float(row['TMn_IDW']),
+                                total_nitrogen=float(row['TN_IDW']),
+                                total_sulfur=float(row['TS_IDW']),
+                                ln_cd_solution=float(row['solution'])
+                            )
+                            batch_objects.append(cropcd_input)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(CropCdInputData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始CropCd输入数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("CropCd输入数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "CropCd_input"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = CropCdInputDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 225 - 0
scripts/import_CropCd_output.py

@@ -0,0 +1,225 @@
+"""
+CropCd_output数据导入脚本
+@description: 从Excel文件读取CropCd_output数据并导入到CropCd_output_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.CropCd_output import CropCdOutputData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class CropCdOutputDataImporter:
+    """
+    CropCd输出数据导入器
+
+    @description: 从Excel文件读取CropCd输出数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='CropCd_output'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'CropCd_output'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表
+        self.required_columns = ['Farmland_ID', 'Sample_ID', 'LnCropCd']
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            for col in self.required_columns:
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 检查是否有无效的数值
+            if df[self.required_columns].isnull().any().any():
+                logger.warning("发现无效的数值,将跳过这些行")
+                invalid_rows = df[df[self.required_columns].isnull().any(axis=1)]
+                logger.warning(f"无效行数: {len(invalid_rows)}")
+                df = df.dropna(subset=self.required_columns)
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(CropCdOutputData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建CropCdOutputData对象
+                            cropcd_output = CropCdOutputData(
+                                farmland_id=int(row['Farmland_ID']),
+                                sample_id=int(row['Sample_ID']),
+                                ln_crop_cd=float(row['LnCropCd'])
+                            )
+                            batch_objects.append(cropcd_output)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(CropCdOutputData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始CropCd输出数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("CropCd输出数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "CropCd_output"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = CropCdOutputDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 261 - 0
scripts/import_EffCd_input.py

@@ -0,0 +1,261 @@
+"""
+EffCd_input数据导入脚本
+@description: 从Excel文件读取EffCd_input数据并导入到EffCd_input_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+import numpy as np  # 用于数值计算
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.EffCd_input import EffCdInputData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class EffCdInputDataImporter:
+    """
+    EffCd输入数据导入器
+
+    @description: 从Excel文件读取EffCd输入数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='EffCd_input'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'EffCd_input'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表
+        self.required_columns = [
+            'Farmland_ID', 'Sample_ID',
+            'OC-Fe_0-30', '002_0002IDW', '02_002IDW', '2_02IDW',
+            'AvaK_IDW', 'AvaP_IDW', 'EC_IDW', 'SAvaK_IDW',
+            'TAl_IDW', 'TCa_IDW', 'TCd_IDW', 'TEB_IDW', 'TExH_IDW',
+            'TFe_IDW', 'TK_IDW', 'TMg_IDW', 'TMn_IDW', 'TN_IDW',
+            'TP_IDW', 'TS_IDW', 'Cdsolution'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            numeric_columns = [col for col in self.required_columns if col not in ['Farmland_ID', 'Sample_ID']]
+
+            for col in ['Farmland_ID', 'Sample_ID'] + numeric_columns:
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 检查是否有无效的数值
+            numeric_check_columns = self.required_columns[2:]  # 跳过ID字段
+            if df[numeric_check_columns].isnull().any().any():
+                logger.warning("发现无效的数值,将跳过这些行")
+                invalid_rows = df[df[numeric_check_columns].isnull().any(axis=1)]
+                logger.warning(f"无效行数: {len(invalid_rows)}")
+                df = df.dropna(subset=numeric_check_columns)
+
+            # 特殊处理OC-Fe列(包含破折号)
+            if 'OC-Fe_0-30' in df.columns:
+                df.rename(columns={'OC-Fe_0-30': 'OC_Fe_0_30'}, inplace=True)
+                self.required_columns[self.required_columns.index('OC-Fe_0-30')] = 'OC_Fe_0_30'
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(EffCdInputData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建EffCdInputData对象
+                            effcd_data = EffCdInputData(
+                                farmland_id=int(row['Farmland_ID']),
+                                sample_id=int(row['Sample_ID']),
+                                oc_fe_0_30=float(row['OC_Fe_0_30']),
+                                silt_content =float(row['002_0002IDW']),
+                                sand_content=float(row['02_002IDW']),
+                                gravel_content=float(row['2_02IDW']),
+                                available_potassium=float(row['AvaK_IDW']),
+                                available_phosphorus =float(row['AvaP_IDW']),
+                                electrical_conductivity=float(row['EC_IDW']),
+                                slow_available_potassium=float(row['SAvaK_IDW']),
+                                total_aluminum=float(row['TAl_IDW']),
+                                total_calcium=float(row['TCa_IDW']),
+                                total_cadmium=float(row['TCd_IDW']),
+                                soluble_salts=float(row['TEB_IDW']),
+                                exchangeable_acidity=float(row['TExH_IDW']),
+                                total_iron=float(row['TFe_IDW']),
+                                total_potassium=float(row['TK_IDW']),
+                                total_magnesium=float(row['TMg_IDW']),
+                                total_manganese=float(row['TMn_IDW']),
+                                total_nitrogen=float(row['TN_IDW']),
+                                total_phosphorus=float(row['TP_IDW']),
+                                total_sulfur=float(row['TS_IDW']),
+                                cd_solution=float(row['Cdsolution'])
+                            )
+                            batch_objects.append(effcd_data)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(EffCdInputData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始EffCd输入数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("EffCd输入数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "EffCd_input"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = EffCdInputDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 225 - 0
scripts/import_EffCd_output.py

@@ -0,0 +1,225 @@
+"""
+EffCd_output数据导入脚本
+@description: 从Excel文件读取EffCd_output数据并导入到EffCd_output_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.EffCd_output import EffCdOutputData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class EffCdOutputDataImporter:
+    """
+    EffCd输出数据导入器
+
+    @description: 从Excel文件读取EffCd输出数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='EffCd_output'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'EffCd_output'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表
+        self.required_columns = ['Farmland_ID', 'Sample_ID', 'LnEffCd']
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            for col in self.required_columns:
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 检查是否有无效的数值
+            if df[self.required_columns].isnull().any().any():
+                logger.warning("发现无效的数值,将跳过这些行")
+                invalid_rows = df[df[self.required_columns].isnull().any(axis=1)]
+                logger.warning(f"无效行数: {len(invalid_rows)}")
+                df = df.dropna(subset=self.required_columns)
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(EffCdOutputData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建EffCdOutputData对象
+                            effcd_output = EffCdOutputData(
+                                farmland_id=int(row['Farmland_ID']),
+                                sample_id=int(row['Sample_ID']),
+                                ln_eff_cd=float(row['LnEffCd'])
+                            )
+                            batch_objects.append(effcd_output)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(EffCdOutputData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始EffCd输出数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("EffCd输出数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "EffCd_output"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = EffCdOutputDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 261 - 0
scripts/import_FluxCd_input.py

@@ -0,0 +1,261 @@
+"""
+FluxCd_input数据导入脚本
+@description: 从Excel文件读取FluxCd_input数据并导入到fluxcd_input_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.FluxCd_input import FluxCdInputData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class FluxCdInputDataImporter:
+    """
+    FluxCd输入数据导入器
+
+    @description: 从Excel文件读取FluxCd输入数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='FluxCd_input'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'FluxCd_input'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表(设计文档中的原始列名)
+        self.required_columns = [
+            'Farmland_ID', 'Sample_ID', 'Initial_Cd',
+            'DQCJ_Cd', 'GGS_Cd', 'NCP_Cd',
+            'DX_Cd', 'DB_Cd', 'ZL_Cd', 'JG_Cd'
+        ]
+
+        # 默认值设置(针对允许空的字段)
+        self.default_values = {
+            'DX_Cd': 0.023,
+            'DB_Cd': 0.368
+        }
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 将列名转换为小写(带下划线)
+            df.columns = [col.lower() for col in df.columns]
+            required_columns_lower = [col.lower() for col in self.required_columns]
+            default_values_lower = {k.lower(): v for k, v in self.default_values.items()}
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            for col in required_columns_lower:
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 处理空值:对于有默认值的列,用默认值填充;其他列必须非空
+            # 对于允许空且有默认值的列
+            for col in ['dx_cd', 'db_cd']:
+                if col in required_columns_lower:
+                    # 用默认值填充空值
+                    default_val = default_values_lower.get(col, None)
+                    if default_val is not None:
+                        df[col] = df[col].fillna(default_val)
+                    # 同时,也要确保其他非空字段没有空值(除了这两个字段,其他字段不能为空)
+
+            # 其他字段如果有空值,则删除行
+            # 先找出没有默认值的必需字段
+            non_default_columns = [col for col in required_columns_lower if col not in ['dx_cd', 'db_cd']]
+            if df[non_default_columns].isnull().any().any():
+                logger.warning("发现非默认值列有无效的数值,将跳过这些行")
+                # 找出这些行
+                invalid_rows = df[df[non_default_columns].isnull().any(axis=1)]
+                logger.warning(f"无效行数: {len(invalid_rows)}")
+                # 删除这些行
+                df = df.dropna(subset=non_default_columns)
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(FluxCdInputData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建FluxCdInputData对象
+                            fluxcd_input = FluxCdInputData(
+                                farmland_id=int(row['farmland_id']),
+                                sample_id=int(row['sample_id']),
+                                initial_cd=float(row['initial_cd']),
+                                atmospheric_deposition=float(row['dqcj_cd']),
+                                irrigation_input=float(row['ggs_cd']),
+                                agro_chemicals_input=float(row['ncp_cd']),
+                                groundwater_leaching=float(row['dx_cd']),
+                                surface_runoff=float(row['db_cd']),
+                                grain_removal=float(row['zl_cd']),
+                                straw_removal=float(row['jg_cd'])
+                            )
+                            batch_objects.append(fluxcd_input)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(FluxCdInputData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始FluxCd输入数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("FluxCd输入数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "FluxCd_input"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = FluxCdInputDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 252 - 0
scripts/import_FluxCd_output.py

@@ -0,0 +1,252 @@
+"""
+FluxCd_output数据导入脚本
+@description: 从Excel文件读取FluxCd_output数据并导入到fluxcd_output_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.FluxCd_output import FluxCdOutputData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class FluxCdOutputDataImporter:
+    """
+    FluxCd输出数据导入器
+
+    @description: 从Excel文件读取FluxCd输出数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='FluxCd_output'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'FluxCd_output'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表(设计文档中的原始列名)
+        self.required_columns = [
+            'Farmland_ID', 'Sample_ID', 'In_Cd',
+            'Out_Cd', 'Net_Cd', 'End_Cd'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 将列名转换为小写(带下划线)
+            df.columns = [col.lower() for col in df.columns]
+            required_columns_lower = [col.lower() for col in self.required_columns]
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            for col in required_columns_lower:
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 处理空值 - 所有字段必须非空
+            if df.isnull().any().any():
+                logger.warning("发现空值,将删除包含空值的行")
+                # 找出空值行
+                invalid_rows = df[df.isnull().any(axis=1)]
+                logger.warning(f"无效行数: {len(invalid_rows)}")
+                # 删除空值行
+                df = df.dropna()
+
+            # 验证逻辑关系:Net_Cd = In_Cd - Out_Cd
+            tolerance = 1e-6
+            net_cd_calculated = df['in_cd'] - df['out_cd']
+            mismatches = abs(df['net_cd'] - net_cd_calculated) > tolerance
+
+            if mismatches.any():
+                mismatched_indices = mismatches[mismatches].index.tolist()
+                logger.warning(f"发现 {len(mismatched_indices)} 行 Net_Cd 值与计算值不一致:")
+                for i in mismatched_indices[:5]:  # 只显示前5个示例
+                    logger.warning(f"行 {i}: Net_Cd={df.at[i, 'net_cd']}, 计算值={net_cd_calculated[i]}")
+
+                # 用计算值覆盖原始值
+                df['net_cd'] = net_cd_calculated
+                logger.info("已自动修正 Net_Cd 值为 In_Cd - Out_Cd")
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(FluxCdOutputData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建FluxCdOutputData对象
+                            fluxcd_output = FluxCdOutputData(
+                                farmland_id=int(row['farmland_id']),
+                                sample_id=int(row['sample_id']),
+                                in_cd=float(row['in_cd']),
+                                out_cd=float(row['out_cd']),
+                                net_cd=float(row['net_cd']),
+                                end_cd=float(row['end_cd'])
+                            )
+                            batch_objects.append(fluxcd_output)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(FluxCdOutputData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始FluxCd输出数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("FluxCd输出数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "FluxCd_output"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = FluxCdOutputDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 370 - 0
scripts/import_MSM_input.py

@@ -0,0 +1,370 @@
+"""
+络合模型输入数据导入脚本
+@description: 从Excel文件读取MSM_input数据并导入到MSM_input_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import numpy as np
+import logging
+from datetime import datetime
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.MSM_input import MSMInputData  # 确保已创建MSMInputData模型
+from sqlalchemy.orm import sessionmaker
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class MSMInputDataImporter:
+    """
+    络合模型输入数据导入器
+
+    @description: 从Excel文件读取MSM输入数据并导入到MSM_input_data表
+    """
+
+    def __init__(self, excel_path, sheet_name='MSM_input'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'MSM_input'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需列
+        self.required_columns = [
+            'Farmland_ID',
+            'Sample_ID',
+            'Var:',
+            'CO2[g].tot',
+            'watervolume',
+            'SL',
+            'pH',
+            'Ca+2.tot',
+            'Mg+2.tot',
+            'K+.tot',
+            'Na+.tot',
+            'Cl-.tot',
+            'Cd.tot',
+            'HFO_kgkg',
+            'CLAY_kgkg',
+            'HA_kgkg',
+            'FA_kgkg'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 检查Farmland_ID和Sample_ID是否重复
+            duplicates = df.duplicated(subset=['Farmland_ID', 'Sample_ID', 'Var:'])
+            if duplicates.any():
+                dup_rows = df[duplicates]
+                logger.warning(f"发现 {len(dup_rows)} 条重复记录(基于Farmland_ID和Sample_ID和Var:)")
+                logger.info("重复记录示例:\n" + dup_rows.head().to_string())
+                # 删除重复行,保留第一个出现的
+                df = df.drop_duplicates(subset=['Farmland_ID', 'Sample_ID', 'Var:'], keep='first')
+                logger.info(f"删除重复记录后剩余 {len(df)} 行数据")
+
+            # 处理字符串列
+            string_columns = ['Var:']
+            for col in string_columns:
+                if col in df.columns:
+                    df[col] = df[col].astype(str).fillna('')
+
+            # 处理数值列
+            numeric_columns = [
+                'CO2[g].tot', 'watervolume', 'SL', 'pH',
+                'Ca+2.tot', 'Mg+2.tot', 'K+.tot', 'Na+.tot',
+                'Cl-.tot', 'Cd.tot', 'HFO_kgkg', 'CLAY_kgkg',
+                'HA_kgkg', 'FA_kgkg'
+            ]
+
+            for col in numeric_columns:
+                if col in df.columns:
+                    # 尝试转换为数值类型
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+
+                    # 检查空值
+                    null_count = df[col].isnull().sum()
+                    if null_count > 0:
+                        logger.warning(f"列 {col} 中有 {null_count} 个空值或无效值")
+
+                        # 对于关键计算列,如果没有数据可能需要设置默认值或跳过
+                        if col in ['pH', 'Cd.tot']:
+                            logger.error(f"关键列 {col} 存在空值,需要处理")
+                            invalid_rows = df[df[col].isnull()]
+                            logger.info("问题行:\n" + invalid_rows.head().to_string())
+
+                            # 标记为无效
+                            df[f'{col}_invalid'] = df[col].isnull()
+
+            # 处理Farmland_ID和Sample_ID
+            for col in ['Farmland_ID', 'Sample_ID']:
+                if col in df.columns:
+                    # 尝试转换为整数
+                    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(-1)
+
+                    # 检查无效值
+                    invalid_ids = df[df[col] < 0]
+                    if not invalid_ids.empty:
+                        logger.warning(f"列 {col} 中有 {len(invalid_ids)} 条无效值")
+                        logger.info("问题行:\n" + invalid_ids.head().to_string())
+
+                        # 标记为无效
+                        df[f'{col}_invalid'] = df[col] < 0
+
+            # 验证计算列的逻辑一致性
+            if 'Cl-.tot' in df.columns and all(
+                    [c in df.columns for c in ['Ca+2.tot', 'Mg+2.tot', 'K+.tot', 'Na+.tot']]):
+                # 计算理论Cl-总量
+                theoretical_cl = (df['Ca+2.tot'] + df['Mg+2.tot']) * 2 + df['K+.tot'] + df['Na+.tot']
+
+                # 检查与提供的Cl-总量的差异
+                cl_diff = abs(df['Cl-.tot'] - theoretical_cl) / theoretical_cl
+                outlier_mask = cl_diff > 0.1  # 超过10%差异的视为异常
+
+                if outlier_mask.sum() > 0:
+                    logger.warning(f"发现 {outlier_mask.sum()} 条记录的Cl-.tot值与计算值存在显著差异(>10%)")
+                    df['Cl-.tot_consistency'] = ~outlier_mask
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def create_msm_input_object(self, row):
+        """
+        创建MSM输入数据对象
+
+        @param {pd.Series} row - 数据行
+        @returns: MSMInputData 对象
+        """
+        try:
+            # 处理无效数据
+            invalid_fields = []
+            for col in self.required_columns:
+                if f'{col}_invalid' in row and row[f'{col}_invalid']:
+                    invalid_fields.append(col)
+
+            if invalid_fields:
+                logger.warning(
+                    f"跳过无效行: Farmland_ID={row['Farmland_ID']}, Sample_ID={row['Sample_ID']}, 无效字段: {', '.join(invalid_fields)}")
+                return None
+
+            # 创建对象
+            return MSMInputData(
+                farmland_id=int(row['Farmland_ID']),
+                sample_id=int(row['Sample_ID']),
+                var=row['Var:'],
+                co2_tot=row['CO2[g].tot'],
+                water_volume=row['watervolume'],
+                sl_ratio=row['SL'],
+                ph_value=row['pH'],
+                ca_tot=row['Ca+2.tot'],
+                mg_tot=row['Mg+2.tot'],
+                k_tot=row['K+.tot'],
+                na_tot=row['Na+.tot'],
+                cl_tot=row['Cl-.tot'],
+                cd_tot=row['Cd.tot'],
+                hfo_kgkg=row['HFO_kgkg'],
+                clay_kgkg=row['CLAY_kgkg'],
+                ha_kgkg=row['HA_kgkg'],
+                fa_kgkg=row['FA_kgkg']
+            )
+        except KeyError as e:
+            logger.warning(f"创建对象时缺少必要字段: {str(e)}")
+            return None
+        except Exception as e:
+            logger.warning(f"创建MSMInputData对象失败: {str(e)}")
+            return None
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查现有数据量
+                existing_count = db.query(MSMInputData).count()
+                logger.info(f"数据库中现有MSM输入数据记录: {existing_count} 条")
+
+                # 批量创建对象并导入
+                total_rows = len(df)
+                imported_count = 0
+                skipped_count = 0
+                invalid_count = 0
+                batch_size = 100
+                objects_to_insert = []
+
+                # 准备批量处理
+                for i, row in df.iterrows():
+                    # 跳过前处理无效数据
+                    invalid = False
+                    for col in self.required_columns:
+                        if f'{col}_invalid' in row and row[f'{col}_invalid']:
+                            invalid = True
+                            break
+
+                    if invalid:
+                        invalid_count += 1
+                        continue
+
+                    try:
+                        obj = self.create_msm_input_object(row)
+                        if not obj:
+                            skipped_count += 1
+                            continue
+
+                        objects_to_insert.append(obj)
+                        imported_count += 1
+
+                        # 每100条提交一次
+                        if len(objects_to_insert) >= batch_size:
+                            db.add_all(objects_to_insert)
+                            db.commit()
+                            logger.info(f"已批量导入 {imported_count}/{total_rows} 条数据")
+                            objects_to_insert = []
+
+                    except Exception as e:
+                        logger.warning(f"处理行 {i} 时出错: {str(e)}")
+                        skipped_count += 1
+                        db.rollback()
+
+                # 提交剩余数据
+                if objects_to_insert:
+                    db.add_all(objects_to_insert)
+                    db.commit()
+
+                # 更新统计信息
+                new_count = db.query(MSMInputData).count()
+                added_count = new_count - existing_count
+
+                logger.info(f"MSM输入数据导入完成!")
+                logger.info(f"尝试导入行数: {total_rows}")
+                logger.info(f"成功导入: {imported_count} 条")
+                logger.info(f"跳过无效数据: {invalid_count} 条")
+                logger.info(f"处理失败: {skipped_count} 条")
+                logger.info(f"数据库中新增加: {added_count} 条记录")
+                logger.info(f"数据库总记录: {new_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始MSM输入数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("MSM输入数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 根据实际路径修改
+    sheet_name = "MSM_input"  # 确保Excel中有这个sheet
+
+    try:
+        # 创建导入器并执行导入
+        importer = MSMInputDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 314 - 0
scripts/import_MSM_output.py

@@ -0,0 +1,314 @@
+"""
+络合模型输出数据导入脚本
+@description: 从Excel文件读取MSM_output数据并导入到MSM_output_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.MSM_output import MSMOutputData  # 确保已创建MSMOutputData模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class MSMOutputDataImporter:
+    """
+    络合模型输出数据导入器
+
+    @description: 从Excel文件读取MSM输出数据并导入到MSM_output_data表
+    """
+
+    def __init__(self, excel_path, sheet_name='MSM_output'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'MSM_output'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需列
+        self.required_columns = [
+            'Farmland_ID',
+            'Sample_ID',
+            'Var:',
+            'Cd.solution'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 检查Farmland_ID、Sample_ID和Var:是否重复
+            duplicates = df.duplicated(subset=['Farmland_ID', 'Sample_ID', 'Var:'])
+            if duplicates.any():
+                dup_rows = df[duplicates]
+                logger.warning(f"发现 {len(dup_rows)} 条重复记录(基于Farmland_ID, Sample_ID和Var:)")
+                logger.info("重复记录示例:\n" + dup_rows.head().to_string())
+                # 删除重复行,保留第一个出现的
+                df = df.drop_duplicates(subset=['Farmland_ID', 'Sample_ID', 'Var:'], keep='first')
+                logger.info(f"删除重复记录后剩余 {len(df)} 行数据")
+
+            # 处理字符串列
+            string_columns = ['Var:']
+            for col in string_columns:
+                if col in df.columns:
+                    df[col] = df[col].astype(str).fillna('')
+
+            # 处理数值列
+            numeric_columns = ['Cd.solution']
+            for col in numeric_columns:
+                if col in df.columns:
+                    # 尝试转换为数值类型
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+
+                    # 检查空值
+                    null_count = df[col].isnull().sum()
+                    if null_count > 0:
+                        logger.warning(f"列 {col} 中有 {null_count} 个空值或无效值")
+                        # 标记为无效
+                        df[f'{col}_invalid'] = df[col].isnull()
+
+            # 处理Farmland_ID和Sample_ID
+            for col in ['Farmland_ID', 'Sample_ID']:
+                if col in df.columns:
+                    # 尝试转换为整数
+                    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(-1)
+
+                    # 检查无效值
+                    invalid_ids = df[df[col] < 0]
+                    if not invalid_ids.empty:
+                        logger.warning(f"列 {col} 中有 {len(invalid_ids)} 条无效值")
+                        logger.info("问题行:\n" + invalid_ids.head().to_string())
+                        # 标记为无效
+                        df[f'{col}_invalid'] = df[col] < 0
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def create_msm_output_object(self, row):
+        """
+        创建MSM输出数据对象
+
+        @param {pd.Series} row - 数据行
+        @returns: MSMOutputData 对象
+        """
+        try:
+            # 处理无效数据
+            invalid_fields = []
+            for col in self.required_columns:
+                if f'{col}_invalid' in row and row[f'{col}_invalid']:
+                    invalid_fields.append(col)
+
+            if invalid_fields:
+                logger.warning(
+                    f"跳过无效行: Farmland_ID={row['Farmland_ID']}, Sample_ID={row['Sample_ID']}, 无效字段: {', '.join(invalid_fields)}")
+                return None
+
+            # 创建对象
+            return MSMOutputData(
+                farmland_id=int(row['Farmland_ID']),
+                sample_id=int(row['Sample_ID']),
+                var=row['Var:'],
+                cd_solution=row['Cd.solution']
+            )
+        except KeyError as e:
+            logger.warning(f"创建对象时缺少必要字段: {str(e)}")
+            return None
+        except Exception as e:
+            logger.warning(f"创建MSMOutputData对象失败: {str(e)}")
+            return None
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查现有数据量
+                existing_count = db.query(MSMOutputData).count()
+                logger.info(f"数据库中现有MSM输出数据记录: {existing_count} 条")
+
+                # 批量创建对象并导入
+                total_rows = len(df)
+                imported_count = 0
+                skipped_count = 0
+                invalid_count = 0
+                batch_size = 100
+                objects_to_insert = []
+
+                # 准备批量处理
+                for i, row in df.iterrows():
+                    # 跳过前处理无效数据
+                    invalid = False
+                    for col in self.required_columns:
+                        if f'{col}_invalid' in row and row[f'{col}_invalid']:
+                            invalid = True
+                            break
+
+                    if invalid:
+                        invalid_count += 1
+                        continue
+
+                    try:
+                        obj = self.create_msm_output_object(row)
+                        if not obj:
+                            skipped_count += 1
+                            continue
+
+                        objects_to_insert.append(obj)
+                        imported_count += 1
+
+                        # 每100条提交一次
+                        if len(objects_to_insert) >= batch_size:
+                            db.add_all(objects_to_insert)
+                            db.commit()
+                            logger.info(f"已批量导入 {imported_count}/{total_rows} 条数据")
+                            objects_to_insert = []
+
+                    except Exception as e:
+                        logger.warning(f"处理行 {i} 时出错: {str(e)}")
+                        skipped_count += 1
+                        db.rollback()
+
+                # 提交剩余数据
+                if objects_to_insert:
+                    db.add_all(objects_to_insert)
+                    db.commit()
+
+                # 更新统计信息
+                new_count = db.query(MSMOutputData).count()
+                added_count = new_count - existing_count
+
+                logger.info(f"MSM输出数据导入完成!")
+                logger.info(f"尝试导入行数: {total_rows}")
+                logger.info(f"成功导入: {imported_count} 条")
+                logger.info(f"跳过无效数据: {invalid_count} 条")
+                logger.info(f"处理失败: {skipped_count} 条")
+                logger.info(f"数据库中新增加: {added_count} 条记录")
+                logger.info(f"数据库总记录: {new_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始MSM输出数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("MSM输出数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 根据实际路径修改
+    sheet_name = "MSM_output"  # 确保Excel中有这个sheet
+
+    try:
+        # 创建导入器并执行导入
+        importer = MSMOutputDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 305 - 0
scripts/import_agricultural.py

@@ -0,0 +1,305 @@
+"""
+Agricultural数据导入脚本
+@description: 从Excel文件读取agricultural_data数据并导入到agricultural_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.agricultural import AgriculturalData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class AgriculturalDataImporter:
+    """
+    农业投入品数据导入器
+
+    @description: 从Excel文件读取农业投入品数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='Agricultural'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'agricultural_data'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表(根据数据库设计文档)
+        self.required_columns = [
+            'county_name', 'crop_sowing_area', 'nitrogen_usage',
+            'phosphorus_usage', 'potassium_usage', 'compound_usage',
+            'organic_usage', 'pesticide_usage', 'farmyard_usage',
+            'plastic_film_usage', 'nitrogen_cd_flux', 'phosphorus_cd_flux',
+            'potassium_cd_flux', 'compound_cd_flux', 'organic_cd_flux',
+            'pesticide_cd_flux', 'farmyard_cd_flux', 'plastic_film_cd_flux',
+            'total_cd_flux', 'data_year'
+        ]
+
+        # 数值型字段列表
+        self.numeric_columns = [
+            'crop_sowing_area', 'nitrogen_usage', 'phosphorus_usage',
+            'potassium_usage', 'compound_usage', 'organic_usage',
+            'pesticide_usage', 'farmyard_usage', 'plastic_film_usage',
+            'nitrogen_cd_flux', 'phosphorus_cd_flux', 'potassium_cd_flux',
+            'compound_cd_flux', 'organic_cd_flux', 'pesticide_cd_flux',
+            'farmyard_cd_flux', 'plastic_film_cd_flux', 'total_cd_flux',
+            'data_year'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 将列名转换为小写(带下划线)
+            df.columns = [col.lower() for col in df.columns]
+            required_columns_lower = [col.lower() for col in self.required_columns]
+            numeric_columns_lower = [col.lower() for col in self.numeric_columns]
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            for col in numeric_columns_lower:
+                if col in df.columns:
+                    # 对于数值列,转换为浮点数
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+
+                    # 处理特殊字段data_year(转换为整数)
+                    if col == 'data_year':
+                        df[col] = df[col].astype(pd.Int64Dtype(), errors='ignore')
+
+            # 处理空值 - 所有字段必须非空(除了县市名称可能是文本)
+            empty_columns = df.isnull().any()
+            empty_cols = [col for col in empty_columns.index if empty_columns[col]]
+
+            if empty_cols:
+                logger.warning(f"发现以下列存在空值: {', '.join(empty_cols)}")
+
+                # 对于数值列,如果有空值,填充为0
+                for col in numeric_columns_lower:
+                    if col in df.columns and df[col].isnull().any():
+                        df[col] = df[col].fillna(0)
+                        logger.info(f"已将 {col} 的空值替换为0")
+
+                # 再次检查县市名称
+                if 'county_name' in df.columns and df['county_name'].isnull().any():
+                    logger.warning("县市名称存在空值,填充为'未知区域'")
+                    df['county_name'] = df['county_name'].fillna('未知区域')
+
+            # 验证逻辑关系:总镉输入通量是否等于各分项之和
+            tolerance = 1e-6
+            total_calculated = (
+                    df['nitrogen_cd_flux'] + df['phosphorus_cd_flux'] +
+                    df['potassium_cd_flux'] + df['compound_cd_flux'] +
+                    df['organic_cd_flux'] + df['pesticide_cd_flux'] +
+                    df['farmyard_cd_flux'] + df['plastic_film_cd_flux']
+            )
+
+            mismatches = abs(df['total_cd_flux'] - total_calculated) > tolerance
+
+            if mismatches.any():
+                mismatched_indices = mismatches[mismatches].index.tolist()
+                logger.warning(f"发现 {len(mismatched_indices)} 行 total_cd_flux 值与各分项之和不一致:")
+
+                for i in mismatched_indices[:5]:  # 只显示前5个示例
+                    logger.warning(f"行 {i}: total_cd_flux={df.at[i, 'total_cd_flux']}, 计算值={total_calculated[i]}")
+
+                # 用计算值覆盖原始值
+                df['total_cd_flux'] = total_calculated
+                logger.info("已自动修正 total_cd_flux 值为各分项之和")
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(AgriculturalData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建AgriculturalData对象
+                            agricultural_data = AgriculturalData(
+                                county_name=str(row['county_name']),
+                                crop_sowing_area=float(row['crop_sowing_area']),
+                                nitrogen_usage=float(row['nitrogen_usage']),
+                                phosphorus_usage=float(row['phosphorus_usage']),
+                                potassium_usage=float(row['potassium_usage']),
+                                compound_usage=float(row['compound_usage']),
+                                organic_usage=float(row['organic_usage']),
+                                pesticide_usage=float(row['pesticide_usage']),
+                                farmyard_usage=float(row['farmyard_usage']),
+                                plastic_film_usage=float(row['plastic_film_usage']),
+                                nitrogen_cd_flux=float(row['nitrogen_cd_flux']),
+                                phosphorus_cd_flux=float(row['phosphorus_cd_flux']),
+                                potassium_cd_flux=float(row['potassium_cd_flux']),
+                                compound_cd_flux=float(row['compound_cd_flux']),
+                                organic_cd_flux=float(row['organic_cd_flux']),
+                                pesticide_cd_flux=float(row['pesticide_cd_flux']),
+                                farmyard_cd_flux=float(row['farmyard_cd_flux']),
+                                plastic_film_cd_flux=float(row['plastic_film_cd_flux']),
+                                total_cd_flux=float(row['total_cd_flux']),
+                                data_year=int(row['data_year'])
+                            )
+                            batch_objects.append(agricultural_data)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(AgriculturalData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始农业投入品数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("农业投入品数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "Agricultural"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = AgriculturalDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 338 - 0
scripts/import_assessment.py

@@ -0,0 +1,338 @@
+"""
+评价数据导入脚本
+@description: 从Excel文件读取Assessment评价数据并导入到Assessment表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.assessment import Assessment  # 确保已创建Assessment模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class AssessmentDataImporter:
+    """
+    评价数据导入器
+
+    @description: 从Excel文件读取评价数据并导入到Assessment表
+    """
+
+    def __init__(self, excel_path, sheet_name='Assessment'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'Assessment'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 用地类型映射
+        self.land_use_mapping = {
+            '旱地': 0.0,
+            '水田': 1.0,
+            '水浇地': 2.0
+        }
+
+        # 定义必需列
+        self.required_columns = [
+            'Farmland_ID',
+            'Sample_ID',
+            'Type',
+            'IDW_2023SP_Cd',
+            'IDW_2023SP_pH',
+            'SOM_IDW',
+            'safety_production_threshold',
+            'pollution_risk_screening_value'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 检查Farmland_ID和Sample_ID是否重复
+            duplicates = df.duplicated(subset=['Farmland_ID', 'Sample_ID'])
+            if duplicates.any():
+                dup_rows = df[duplicates]
+                logger.warning(f"发现 {len(dup_rows)} 条重复记录(基于Farmland_ID和Sample_ID)")
+                logger.info("重复记录示例:\n" + dup_rows.head().to_string())
+                # 删除重复行,保留第一个出现的
+                df = df.drop_duplicates(subset=['Farmland_ID', 'Sample_ID'], keep='first')
+                logger.info(f"删除重复记录后剩余 {len(df)} 行数据")
+
+            # 转换数值类型
+            numeric_columns = [
+                'IDW_2023SP_Cd',
+                'IDW_2023SP_pH',
+                'SOM_IDW',
+                'safety_production_threshold',
+                'pollution_risk_screening_value'
+            ]
+
+            for col in numeric_columns:
+                if col in df.columns:
+                    # 尝试转换为数值类型
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+
+                    # 检查空值
+                    if df[col].isnull().any():
+                        invalid_rows = df[df[col].isnull()]
+                        logger.warning(f"列 {col} 中有无效值,行号: {list(invalid_rows.index)}")
+
+                        # 标记为无效但保留行,稍后处理
+                        df[f'{col}_valid'] = ~df[col].isnull()
+
+            # 转换Farmland_ID和Sample_ID为整数
+            for col in ['Farmland_ID', 'Sample_ID']:
+                if col in df.columns:
+                    # 首先转换为浮点类型,再尝试转整数
+                    df[col] = pd.to_numeric(df[col], errors='coerce').fillna(-1)
+                    df[col] = df[col].astype(int)
+
+                    # 检查无效值
+                    if (df[col] < 0).any():
+                        invalid_rows = df[df[col] < 0]
+                        logger.warning(f"列 {col} 中有无效值,行号: {list(invalid_rows.index)}")
+                        df[f'{col}_valid'] = (df[col] >= 0)
+
+            # 用地类型转换
+            if 'Type' in df.columns:
+                # 尝试直接转换为数值
+                df['Type_Numeric'] = pd.to_numeric(df['Type'], errors='coerce')
+
+                # 处理无法转换的类型
+                unknown_types = df[df['Type_Numeric'].isnull()]['Type'].unique()
+                if len(unknown_types) > 0:
+                    logger.info(f"发现未知用地类型: {unknown_types}, 尝试映射...")
+                    # 使用映射转换
+                    df['Type_Mapped'] = df['Type'].map(self.land_use_mapping)
+
+                    # 合并两种转换方式
+                    df['Final_Type'] = df['Type_Numeric'].fillna(df['Type_Mapped'])
+                else:
+                    df['Final_Type'] = df['Type_Numeric']
+
+                # 检查是否还有无效值
+                if df['Final_Type'].isnull().any():
+                    invalid_rows = df[df['Final_Type'].isnull()]
+                    logger.warning(f"列 Type 中有无法识别的值,行号: {list(invalid_rows.index)}")
+                    logger.info("为无效值设置默认值0.0(旱地)")
+                    df['Final_Type'] = df['Final_Type'].fillna(0.0)
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def create_assessment_object(self, row):
+        """
+        创建评价数据对象
+
+        @param {pd.Series} row - 数据行
+        @returns: Assessment 对象
+        """
+        try:
+            return Assessment(
+                farmland_id=row['Farmland_ID'],
+                sample_id=row['Sample_ID'],
+                type=row['Final_Type'],
+                idw_2023sp_cd=row['IDW_2023SP_Cd'],
+                idw_2023sp_ph=row['IDW_2023SP_pH'],
+                som_idw=row['SOM_IDW'],
+                safety_production_threshold=row['safety_production_threshold'],
+                pollution_risk_screening_value=row['pollution_risk_screening_value']
+            )
+        except KeyError as e:
+            logger.warning(f"创建对象时缺少必要字段: {str(e)}")
+            return None
+        except Exception as e:
+            logger.warning(f"创建Assessment对象失败: {str(e)}")
+            return None
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查现有数据量
+                existing_count = db.query(Assessment).count()
+                logger.info(f"数据库中现有评价数据记录: {existing_count} 条")
+
+                # 批量创建对象并导入
+                total_rows = len(df)
+                imported_count = 0
+                skipped_count = 0
+                invalid_count = 0
+
+                # 分批处理数据
+                for i, row in df.iterrows():
+                    try:
+                        # 检查是否有效行(所有关键字段都有效)
+                        is_valid = True
+                        for col in self.required_columns:
+                            if f'{col}_valid' in row and not row[f'{col}_valid']:
+                                is_valid = False
+                                break
+
+                        if not is_valid:
+                            invalid_count += 1
+                            logger.debug(f"跳过无效行 {i}: 存在无效值")
+                            continue
+
+                        # 创建Assessment对象
+                        assessment = self.create_assessment_object(row)
+                        if not assessment:
+                            skipped_count += 1
+                            continue
+
+                        # 添加到会话
+                        db.add(assessment)
+                        imported_count += 1
+
+                        # 每50条提交一次
+                        if imported_count % 50 == 0:
+                            db.commit()
+                            logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                    except Exception as e:
+                        logger.warning(f"导入行 {i} 时出错: {str(e)}")
+                        skipped_count += 1
+                        db.rollback()
+
+                # 提交剩余数据
+                db.commit()
+
+                # 更新统计信息
+                new_count = db.query(Assessment).count()
+                added_count = new_count - existing_count
+
+                logger.info(f"评价数据导入完成!")
+                logger.info(f"成功导入: {imported_count} 条")
+                logger.info(f"跳过无效数据: {invalid_count} 条")
+                logger.info(f"处理失败: {skipped_count} 条")
+                logger.info(f"数据库中新增加: {added_count} 条记录")
+                logger.info(f"数据库总记录: {new_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始评价数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("评价数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 根据实际路径修改
+    sheet_name = "Assessment"  # 确保Excel中有这个sheet
+
+    try:
+        # 创建导入器并执行导入
+        importer = AssessmentDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 226 - 0
scripts/import_atmo_company.py

@@ -0,0 +1,226 @@
+"""
+Atmo_company数据导入脚本
+@description: 从Excel文件读取涉重企业数据并导入到atmo_company表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.atmo_company import AtmoCompany  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class AtmoCompanyDataImporter:
+    """
+    涉重企业数据导入器
+
+    @description: 从Excel文件读取涉重企业数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='Atmo_company'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'Atmo_company'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表(根据数据库设计文档,除了ID)
+        self.required_columns = [
+            'longitude', 'latitude', 'company_name', 'company_type', 'county', 'particulate_emission'
+        ]
+
+        # 数值型字段列表
+        self.numeric_columns = ['longitude', 'latitude', 'particulate_emission']
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 将列名转换为小写(带下划线)
+            df.columns = [col.lower() for col in df.columns]
+
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(AtmoCompany).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建AtmoCompanyData对象
+                            atmo_company = AtmoCompany(
+                                longitude=float(row['longitude']),
+                                latitude=float(row['latitude']),
+                                company_name=str(row['company_name']),
+                                company_type=str(row['company_type']),
+                                county=str(row['county']),
+                                particulate_emission=float(row['particulate_emission'])
+                            )
+                            batch_objects.append(atmo_company)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(AtmoCompany).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始涉重企业数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("涉重企业数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "Atmo_company"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = AtmoCompanyDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 299 - 0
scripts/import_atmo_sample.py

@@ -0,0 +1,299 @@
+"""
+Atmo_sample数据导入脚本
+@description: 从Excel文件读取大气颗粒物采样数据并导入到atmo_sample_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.atmo_sample import AtmoSampleData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class AtmoSampleDataImporter:
+    """
+    大气颗粒物采样数据导入器
+
+    @description: 从Excel文件读取大气颗粒物采样数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='Atmo_sample'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'Atmo_sample'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表(根据数据库设计文档)
+        self.required_columns = [
+            'ID', 'longitude', 'latitude', 'sampling_location',
+            'start_time', 'end_time', 'cumulative_time',
+            'average_flow_rate', 'cumulative_true_volume',
+            'cumulative_standard_volume', 'sample_type',
+            'sample_name', 'Cr_particulate', 'As_particulate',
+            'Cd_particulate', 'Hg_particulate', 'Pb_particulate',
+            'particle_weight', 'standard_volume',
+            'particle_concentration', 'sample_code',
+            'temperature', 'pressure', 'humidity',
+            'wind_speed', 'wind_direction'
+        ]
+
+        # 数值型字段列表
+        self.numeric_columns = [
+            'longitude', 'latitude', 'average_flow_rate',
+            'cumulative_true_volume', 'cumulative_standard_volume',
+            'Cr_particulate', 'As_particulate', 'Cd_particulate',
+            'Hg_particulate', 'Pb_particulate', 'particle_weight',
+            'standard_volume', 'particle_concentration',
+            'temperature', 'pressure', 'humidity', 'wind_speed'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+
+            # 验证颗粒物浓度逻辑关系
+            particle_cols = ['particle_weight', 'standard_volume', 'particle_concentration']
+            if all(col in df.columns for col in particle_cols):
+                # 计算颗粒物浓度 = 颗粒物质量(mg) * 1000 / 标准体积(m³)
+                # 因为浓度单位是ug/m³,而颗粒物质量单位是mg(1mg = 1000ug)
+                calculated_concentration = df['particle_weight'] * 1000 / df['standard_volume']
+
+                tolerance = 1e-6
+                mismatches = abs(df['particle_concentration'] - calculated_concentration) > tolerance
+
+                if mismatches.any():
+                    mismatched_indices = mismatches[mismatches].index.tolist()
+                    logger.warning(f"发现 {len(mismatched_indices)} 行颗粒物浓度值不符合逻辑:")
+
+                    for i in mismatched_indices[:5]:
+                        logger.warning(
+                            f"行 {i}: 计算值={calculated_concentration[i]}, 实际值={df.at[i, 'particle_concentration']}")
+
+                    # 用计算值覆盖原始值
+                    df['particle_concentration'] = calculated_concentration
+                    logger.info("已自动修正颗粒物浓度值")
+
+            # 处理空值
+            empty_columns = df.isnull().any()
+            empty_cols = [col for col in empty_columns.index if empty_columns[col]]
+
+            if empty_cols:
+                logger.warning(f"发现以下列存在空值: {', '.join(empty_cols)}")
+
+                # 文本列填充空值
+                text_columns = ['id', 'sampling_location', 'start_time', 'end_time', 'cumulative_time',
+                                'sample_type', 'sample_name', 'sample_code', 'wind_direction']
+
+                for col in text_columns:
+                    if col in df.columns and df[col].isnull().any():
+                        logger.warning(f"{col}列存在空值,填充为'未知'")
+                        df[col] = df[col].fillna('未知')
+
+
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(AtmoSampleData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 100
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建AtmoSampleData对象
+                            atmo_sample = AtmoSampleData(
+                                id=str(row['ID']),
+                                longitude=float(row['longitude']),
+                                latitude=float(row['latitude']),
+                                sampling_location=str(row['sampling_location']),
+                                start_time=str(row['start_time']),
+                                end_time=str(row['end_time']),
+                                cumulative_time=str(row['cumulative_time']),
+                                average_flow_rate=float(row['average_flow_rate']),
+                                cumulative_true_volume=float(row['cumulative_true_volume']),
+                                cumulative_standard_volume=float(row['cumulative_standard_volume']),
+                                sample_type=str(row['sample_type']),
+                                sample_name=str(row['sample_name']),
+                                Cr_particulate=float(row['Cr_particulate']),
+                                As_particulate=float(row['As_particulate']),
+                                Cd_particulate=float(row['Cd_particulate']),
+                                Hg_particulate=float(row['Hg_particulate']),
+                                Pb_particulate=float(row['Pb_particulate']),
+                                particle_weight=float(row['particle_weight']),
+                                standard_volume=float(row['standard_volume']),
+                                particle_concentration=float(row['particle_concentration']),
+                                sample_code=str(row['sample_code']),
+                                temperature=float(row['temperature']),
+                                pressure=float(row['pressure']),
+                                humidity=float(row['humidity']),
+                                wind_speed=float(row['wind_speed']),
+                                wind_direction=str(row['wind_direction'])
+                            )
+                            batch_objects.append(atmo_sample)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(AtmoSampleData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始大气颗粒物采样数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("大气颗粒物采样数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 与原始文件相同
+    sheet_name = "Atmo_sample"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = AtmoSampleDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 251 - 0
scripts/import_cross_section.py

@@ -0,0 +1,251 @@
+"""
+Cross_section数据导入脚本
+@description: 从Excel文件读取河流断面数据并导入到cross_section表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.cross_section import CrossSection  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class CrossSectionDataImporter:
+    """
+    河流断面数据导入器
+
+    @description: 从Excel文件读取河流断面数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='Cross_section'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'Cross_section'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表(根据数据库设计文档,除ID外)
+        self.required_columns = ['River', 'Position', 'County', 'Lon', 'Lan', 'Cd']
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 将列名转换为小写(带下划线)以匹配数据库字段
+            df.columns = [col.lower() for col in df.columns]
+            required_columns_lower = [col.lower() for col in self.required_columns]
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            numeric_columns = ['lon', 'lan', 'cd']
+            for col in numeric_columns:
+                if col in df.columns:
+                    # 对于数值列,转换为浮点数
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 处理空值 - 所有字段必须非空
+            original_count = len(df)
+            df = df.dropna(subset=required_columns_lower)
+            new_count = len(df)
+            if new_count < original_count:
+                logger.warning(f"删除空值行 {original_count - new_count} 行")
+
+            # 验证经纬度范围
+            longitude_errors = df[(df['lon'] < -180) | (df['lon'] > 180)]
+            latitude_errors = df[(df['lan'] < -90) | (df['lan'] > 90)]
+
+            if not longitude_errors.empty or not latitude_errors.empty:
+                logger.warning("发现经纬度无效值,将删除这些行")
+                # 保留有效经纬度行
+                valid_mask = (df['lon'].between(-180, 180)) & (df['lan'].between(-90, 90))
+                invalid_df = df[~valid_mask]
+                df = df[valid_mask]
+
+                logger.warning(f"删除无效经纬度数据 {len(invalid_df)} 行")
+                if not invalid_df.empty:
+                    logger.warning("部分无效行示例:")
+                    logger.warning(invalid_df[['lon', 'lan', 'position']].head().to_string())
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(CrossSection).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建CrossSectionData对象
+                            cross_section = CrossSection(
+                                river_name=str(row['river']),
+                                position=str(row['position']),
+                                county=str(row['county']),
+                                longitude=float(row['lon']),
+                                latitude=float(row['lan']),
+                                cd_concentration=float(row['cd'])
+                            )
+                            batch_objects.append(cross_section)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(CrossSection).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始河流断面数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("河流断面数据导入流程完成!")
+            logger.info = "=" * 60
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"
+    sheet_name = "Cross_section"
+
+    try:
+        # 创建导入器并执行导入
+        importer = CrossSectionDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 282 - 0
scripts/import_parameters.py

@@ -0,0 +1,282 @@
+"""
+参数数据导入脚本
+@description: 从Excel文件读取Parameters参数数据并导入到Parameters表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.parameters import Parameters  # 确保你已创建Parameters模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+class ParametersDataImporter:
+    """
+    参数数据导入器
+
+    @description: 从Excel文件读取参数数据并导入到Parameters表
+    """
+
+    def __init__(self, excel_path, sheet_name='Parameters'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'Parameters'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义参数列的默认值映射
+        self.default_values = {
+            'F1': 0.6,
+            'F2': 0.85,
+            'F3': 0.05,
+            'F4': 0.158,
+            'F5': 0.06,
+            'F6': 0.065,
+            'F7': 0.6,
+            'F8': 0.25,
+            'F9': 0.35,
+            'F10': 0.25,
+            'F11': 800,
+            'NF': 0.05,
+            'PF': 0.158,
+            'KF': 0.06,
+            'CF': 0.065,
+            'OF': 0.6,
+            'P': 0.25,
+            'FF': 0.35,
+            'AF': 0.25,
+            'Area': '韶关'
+        }
+
+        # 定义必需的最小参数列
+        self.required_columns = ['Area']
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 确保数据文件包含所有参数列
+            for column in self.default_values.keys():
+                if column not in df.columns:
+                    logger.warning(f"列 {column} 不存在于Excel文件中,将使用默认值 {self.default_values[column]}")
+                    df[column] = self.default_values[column]
+
+            # 验证数值列的数据类型
+            numeric_columns = list(self.default_values.keys())
+            numeric_columns.remove('Area')  # Area不是数值类型
+
+            for col in numeric_columns:
+                # 尝试转换为数值类型
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+
+                # 检查是否有无效值
+                if df[col].isnull().any():
+                    invalid_rows = df[df[col].isnull()]
+                    logger.warning(f"列 {col} 中有无效值,行号: {list(invalid_rows.index)}")
+
+                    # 填充默认值
+                    logger.info(f"将为无效行填充默认值 {self.default_values[col]}")
+                    df[col] = df[col].fillna(self.default_values[col])
+
+            # 确保Area列是字符串类型
+            if 'Area' in df.columns:
+                df['Area'] = df['Area'].astype(str)
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(Parameters).count()
+                logger.info(f"数据库中现有参数记录: {existing_count} 条")
+
+                # 如果已有数据,先删除现有数据
+                if existing_count > 0:
+                    db.query(Parameters).delete()
+                    logger.info(f"已删除 {existing_count} 条现有参数记录")
+
+                # 获取总行数
+                total_rows = len(df)
+                imported_count = 0
+                skipped_count = 0
+
+                # 使用ORM模型创建对象并插入
+                for i, row in df.iterrows():
+                    try:
+                        # 创建参数对象
+                        param = Parameters(
+                            f1=row['F1'],
+                            f2=row['F2'],
+                            f3=row['F3'],
+                            f4=row['F4'],
+                            f5=row['F5'],
+                            f6=row['F6'],
+                            f7=row['F7'],
+                            f8=row['F8'],
+                            f9=row['F9'],
+                            f10=row['F10'],
+                            f11=row['F11'],
+                            nf=row['NF'],
+                            pf=row['PF'],
+                            kf=row['KF'],
+                            cf=row['CF'],
+                            of=row['OF'],
+                            p=row['P'],
+                            ff=row['FF'],
+                            af=row['AF'],
+                            area=row['Area']
+                        )
+
+                        # 添加到会话
+                        db.add(param)
+                        imported_count += 1
+
+                        # 每100条提交一次
+                        if imported_count % 100 == 0:
+                            db.commit()
+                            logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                    except Exception as e:
+                        logger.warning(f"跳过行 {i}: {str(e)}")
+                        skipped_count += 1
+                        db.rollback()  # 回滚当前会话
+
+                # 提交剩余的数据
+                db.commit()
+
+                logger.info(f"参数导入完成! 成功导入 {imported_count} 条数据, 跳过 {skipped_count} 条无效数据")
+
+                # 验证导入结果
+                final_count = db.query(Parameters).count()
+                logger.info(f"导入后数据库参数记录: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始参数数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("参数数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"  # 或你的文件路径
+    sheet_name = "Parameters"  # 确保你的Excel文件中有这个sheet
+
+    try:
+        # 创建导入器并执行导入
+        importer = ParametersDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()

+ 289 - 0
scripts/import_soil_data.py

@@ -0,0 +1,289 @@
+"""
+土壤数据导入脚本
+@description: 从Excel文件读取Soil数据并导入到Soil_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.soil import SoilData  # 假设已创建SoilData模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class SoilDataImporter:
+    """
+    土壤数据导入器
+
+    @description: 从Excel文件读取土壤数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='Soil'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'Soil'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义默认值
+        self.default_values = {
+            'DX_Cd': 0.023,
+            'DB_Cd': 0.368
+        }
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            required_columns = ['Farmland_ID', 'Sample_ID']
+            missing_columns = [col for col in required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            numeric_cols = [
+                'Farmland_ID', 'Sample_ID', '0002IDW', 'bd020_90', 'POR_Layer',
+                'ExAl_IDW', 'ExCa_IDW', 'ExK_IDW', 'ExMg_IDW', 'ExNa_IDW',
+                'Fed_IDW', 'SOM_IDW', 'IDW_2013PC_Cd', 'IDW_2018XC_Cd', 'IDW_2023SP_Cd',
+                'IDW_2013PC_pH', 'IDW_2018XC_pH', 'IDW_2023SP_pH', '002_0002IDW',
+                '02_002IDW', '2_02IDW', 'AvaK_IDW', 'AvaP_IDW', 'CEC_IDW', 'EC_IDW',
+                'OC-Fe_0-30', 'SAvaK_IDW', 'TAl_IDW', 'TCa_IDW', 'TCd_IDW', 'TEB_IDW',
+                'TExH_IDW', 'TFe_IDW', 'TK_IDW', 'TMg_IDW', 'TMn_IDW', 'TN_IDW',
+                'TP_IDW', 'TS_IDW', 'DQCJ_Cd', 'GGS_Cd', 'DX_Cd', 'DB_Cd'
+            ]
+
+            # 只处理存在的列
+            existing_numeric_cols = [col for col in numeric_cols if col in df.columns]
+
+            # 转换数值类型
+            for col in existing_numeric_cols:
+                df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 应用默认值
+            for col, default_val in self.default_values.items():
+                if col in df.columns:
+                    df[col].fillna(default_val, inplace=True)
+
+            # 删除Farmland_ID或Sample_ID为空的行
+            initial_count = len(df)
+            df = df.dropna(subset=['Farmland_ID', 'Sample_ID'])
+
+            if len(df) < initial_count:
+                logger.warning(f"删除 {initial_count - len(df)} 行缺少必需值的数据")
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(SoilData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 1000
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i + batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建SoilData对象
+                            soil_data = SoilData(
+                                farmland_id=int(row['Farmland_ID']),
+                                sample_id=int(row['Sample_ID']),
+                                clay_0002IDW=row.get('0002IDW'),
+                                bd020_90=row.get('bd020_90'),
+                                por_layer=row.get('POR_Layer'),
+                                exal_idw=row.get('ExAl_IDW'),
+                                exca_idw=row.get('ExCa_IDW'),
+                                exk_idw=row.get('ExK_IDW'),
+                                exmg_idw=row.get('ExMg_IDW'),
+                                exna_idw=row.get('ExNa_IDW'),
+                                fed_idw=row.get('Fed_IDW'),
+                                som_idw=row.get('SOM_IDW'),
+                                idw_2013pc_cd=row.get('IDW_2013PC_Cd'),
+                                idw_2018xc_cd=row.get('IDW_2018XC_Cd'),
+                                idw_2023sp_cd=row.get('IDW_2023SP_Cd'),
+                                idw_2013pc_ph=row.get('IDW_2013PC_pH'),
+                                idw_2018xc_ph=row.get('IDW_2018XC_pH'),
+                                idw_2023sp_ph=row.get('IDW_2023SP_pH'),
+                                silt_002_0002IDW=row.get('002_0002IDW'),
+                                sand_02_002IDW=row.get('02_002IDW'),
+                                gravel_2_02IDW=row.get('2_02IDW'),
+                                avak_idw=row.get('AvaK_IDW'),
+                                avap_idw=row.get('AvaP_IDW'),
+                                cec_idw=row.get('CEC_IDW'),
+                                ec_idw=row.get('EC_IDW'),
+                                oc_fe_0_30=row.get('OC-Fe_0-30'),
+                                savak_idw=row.get('SAvaK_IDW'),
+                                tal_idw=row.get('TAl_IDW'),
+                                tca_idw=row.get('TCa_IDW'),
+                                tcd_idw=row.get('TCd_IDW'),
+                                teb_idw=row.get('TEB_IDW'),
+                                texh_idw=row.get('TExH_IDW'),
+                                tfe_idw=row.get('TFe_IDW'),
+                                tk_idw=row.get('TK_IDW'),
+                                tmg_idw=row.get('TMg_IDW'),
+                                tmn_idw=row.get('TMn_IDW'),
+                                tn_idw=row.get('TN_IDW'),
+                                tp_idw=row.get('TP_IDW'),
+                                ts_idw=row.get('TS_IDW'),
+                                dqcj_cd=row.get('DQCJ_Cd'),
+                                ggs_cd=row.get('GGS_Cd'),
+                                dx_cd=row.get('DX_Cd', self.default_values['DX_Cd']),
+                                db_cd=row.get('DB_Cd', self.default_values['DB_Cd'])
+                            )
+                            batch_objects.append(soil_data)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(SoilData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始土壤数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("土壤数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"
+    sheet_name = "Soil"
+
+    try:
+        # 创建导入器并执行导入
+        importer = SoilDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

+ 319 - 0
scripts/import_water_sample.py

@@ -0,0 +1,319 @@
+"""
+Water_sample数据导入脚本
+@description: 从Excel文件读取灌溉水采样数据并导入到water_sampling_data表
+"""
+
+import os
+import sys
+import pandas as pd
+import logging
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+import re
+
+# 添加项目根目录到Python路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from app.database import engine, SessionLocal
+from app.models.water_sample import WaterSampleData  # 需创建对应的ORM模型
+
+# 设置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+class WaterSampleDataImporter:
+    """
+    灌溉水采样数据导入器
+
+    @description: 从Excel文件读取灌溉水采样数据并导入到数据库
+    """
+
+    def __init__(self, excel_path, sheet_name='Water_sample'):
+        """
+        初始化导入器
+
+        @param {str} excel_path - Excel文件路径
+        @param {str} sheet_name - Sheet名称,默认为'Water_sample'
+        """
+        self.excel_path = excel_path
+        self.sheet_name = sheet_name
+
+        # 定义必需字段列表
+        self.required_columns = [
+            'sample_code', 'lon', 'lat', 'sampling_location',
+            'sample_time', 'Cr', 'As', 'Cd', 'Hg', 'Pb', 'pH'
+        ]
+
+        # 数值型字段列表
+        self.numeric_columns = [
+            'lon', 'lat', 'sampling_volume', 'Cr', 'As', 'Cd', 'Hg', 'Pb', 'pH'
+        ]
+
+    def read_excel_data(self):
+        """
+        读取Excel文件数据
+
+        @returns: DataFrame 读取的数据
+        """
+        try:
+            logger.info(f"开始读取Excel文件: {self.excel_path}")
+            logger.info(f"Sheet名称: {self.sheet_name}")
+
+            # 检查文件是否存在
+            if not os.path.exists(self.excel_path):
+                raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
+
+            # 读取Excel文件
+            df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
+
+            logger.info(f"成功读取数据,共 {len(df)} 行")
+            logger.info(f"数据列: {list(df.columns)}")
+
+            # 显示前几行数据供确认
+            logger.info("前5行数据预览:")
+            logger.info(df.head().to_string())
+
+            return df
+
+        except Exception as e:
+            logger.error(f"读取Excel文件失败: {str(e)}")
+            raise
+
+    def clean_sample_time(self, time_str):
+        """
+        清理和标准化样本时间格式
+
+        @param {str} time_str - 时间字符串
+        @returns: datetime 标准化的时间对象
+        """
+        try:
+            # 尝试转换常见格式
+            if isinstance(time_str, str):
+                # 处理可能的格式:2024.5.20 16:37
+                if '.' in time_str and ':' in time_str:
+                    return datetime.strptime(time_str, "%Y.%m.%d %H:%M")
+                # 处理其他格式
+                elif '/' in time_str:
+                    return datetime.strptime(time_str, "%Y/%m/%d %H:%M")
+                elif '-' in time_str:
+                    return datetime.strptime(time_str, "%Y-%m-%d %H:%M")
+            return time_str
+        except Exception as e:
+            logger.warning(f"无法解析时间字符串: {time_str}, 错误: {str(e)}")
+            return None
+
+    def validate_data(self, df):
+        """
+        验证数据格式和完整性
+
+        @param {DataFrame} df - 要验证的数据
+        @returns: DataFrame 验证后的数据
+        """
+        try:
+            logger.info("开始验证数据...")
+
+            # 检查必需的列是否存在
+            missing_columns = [col for col in self.required_columns if col not in df.columns]
+
+            if missing_columns:
+                raise ValueError(f"缺少必需的列: {missing_columns}")
+
+            # 将列名转换为小写(带下划线)
+            df.columns = [col.lower().replace(' ', '_') for col in df.columns]
+            required_columns_lower = [col.lower().replace(' ', '_') for col in self.required_columns]
+            numeric_columns_lower = [col.lower().replace(' ', '_') for col in self.numeric_columns]
+
+            # 检查数据类型
+            logger.info("检查数据类型...")
+
+            # 转换数值类型
+            for col in numeric_columns_lower:
+                if col in df.columns:
+                    # 对于数值列,转换为浮点数
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+
+            # 特殊处理时间字段
+            if 'sample_time' in df.columns:
+                # 清理和标准化时间格式
+                df['sample_time'] = df['sample_time'].apply(self.clean_sample_time)
+
+            # 处理空值 - 所有字段必须非空(除了文本描述)
+            empty_columns = df.isnull().any()
+            empty_cols = [col for col in empty_columns.index if empty_columns[col]]
+
+            if empty_cols:
+                logger.warning(f"发现以下列存在空值: {', '.join(empty_cols)}")
+
+                # 对于数值列,如果有空值,填充为0
+                for col in numeric_columns_lower:
+                    if col in df.columns and df[col].isnull().any():
+                        df[col] = df[col].fillna(0)
+                        logger.info(f"已将 {col} 的空值替换为0")
+
+                # 文本列填充空值为'未知'
+                text_columns = [
+                    'sample_number', 'weather', 'container_material',
+                    'container_color', 'sample_description',
+                    'water_quality', 'water_environment',
+                    'storage_method'
+                ]
+
+                for col in text_columns:
+                    if col in df.columns and df[col].isnull().any():
+                        logger.warning(f"{col}列存在空值,填充为'未知'")
+                        df[col] = df[col].fillna('未知')
+
+            # 验证经纬度范围
+            longitude_errors = df[(df['lon'] < -180) | (df['lon'] > 180) | (df['lon'].isna())]
+            latitude_errors = df[(df['lat'] < -90) | (df['lat'] > 90) | (df['lat'].isna())]
+
+            if not longitude_errors.empty or not latitude_errors.empty:
+                logger.warning("发现经纬度无效值或空值,将删除这些行")
+                # 保留有效经纬度行
+                valid_mask = ~df['lon'].isna() & ~df['lat'].isna() & (df['lon'].between(-180, 180)) & (df['lat'].between(-90, 90))
+                invalid_df = df[~valid_mask]
+                df = df[valid_mask]
+
+                logger.warning(f"删除无效经纬度数据 {len(invalid_df)} 行")
+                if not invalid_df.empty:
+                    logger.warning("部分无效行示例:")
+                    logger.warning(invalid_df[['lon', 'lat', 'sampling_location']].head().to_string())
+
+            logger.info(f"数据验证完成,有效数据 {len(df)} 行")
+
+            return df
+
+        except Exception as e:
+            logger.error(f"数据验证失败: {str(e)}")
+            raise
+
+    def import_data(self, df):
+        """
+        将数据导入到数据库
+
+        @param {DataFrame} df - 要导入的数据
+        """
+        try:
+            logger.info("开始导入数据到数据库...")
+
+            # 创建数据库会话
+            db = SessionLocal()
+
+            try:
+                # 检查是否有重复数据
+                existing_count = db.query(WaterSampleData).count()
+                logger.info(f"数据库中现有数据: {existing_count} 条")
+
+                # 批量创建对象
+                batch_size = 500  # 由于字段较多,适当减小批量大小
+                total_rows = len(df)
+                imported_count = 0
+
+                for i in range(0, total_rows, batch_size):
+                    batch_df = df.iloc[i:i+batch_size]
+                    batch_objects = []
+
+                    for _, row in batch_df.iterrows():
+                        try:
+                            # 创建WaterSampleData对象
+                            water_sample = WaterSampleData(
+                                sample_code=str(row['sample_code']),
+                                sample_number=str(row['sample_number']),
+                                longitude=float(row['lon']),
+                                latitude=float(row['lat']),
+                                sampling_location=str(row['sampling_location']),
+                                sample_time=row['sample_time'],  # 作为datetime对象存储
+                                weather=str(row['weather']),
+                                container_material=str(row['storage_container_material']),
+                                container_color=str(row['storage_container_color']),
+                                container_capacity=int(row['storage_container_capacity']),
+                                sampling_volume=float(row['sampling_volume']),
+                                sample_description=str(row['sample_description']),
+                                water_quality=str(row['water_quality']),
+                                water_environment=str(row['water_environment']),
+                                storage_method=str(row['storage_method']),
+                                cr_concentration=float(row['cr']),
+                                as_concentration=float(row['as']),
+                                cd_concentration=float(row['cd']),
+                                hg_concentration=float(row['hg']),
+                                pb_concentration=float(row['pb']),
+                                ph_value=float(row['ph'])
+                            )
+                            batch_objects.append(water_sample)
+
+                        except Exception as e:
+                            logger.warning(f"跳过行 {i + _}: {str(e)}")
+                            continue
+
+                    if batch_objects:
+                        # 批量插入
+                        db.add_all(batch_objects)
+                        db.commit()
+                        imported_count += len(batch_objects)
+                        logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
+
+                logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
+
+                # 验证导入结果
+                final_count = db.query(WaterSampleData).count()
+                logger.info(f"导入后数据库总数据: {final_count} 条")
+
+            except Exception as e:
+                db.rollback()
+                logger.error(f"数据导入失败,已回滚: {str(e)}")
+                raise
+            finally:
+                db.close()
+
+        except Exception as e:
+            logger.error(f"数据导入过程失败: {str(e)}")
+            raise
+
+    def run_import(self):
+        """
+        执行完整的导入流程
+        """
+        try:
+            logger.info("=" * 60)
+            logger.info("开始灌溉水采样数据导入流程")
+            logger.info("=" * 60)
+
+            # 1. 读取Excel数据
+            df = self.read_excel_data()
+
+            # 2. 验证数据
+            df = self.validate_data(df)
+
+            # 3. 导入数据
+            self.import_data(df)
+
+            logger.info("=" * 60)
+            logger.info("灌溉水采样数据导入流程完成!")
+            logger.info("=" * 60)
+
+        except Exception as e:
+            logger.error(f"导入流程失败: {str(e)}")
+            raise
+
+def main():
+    """
+    主函数
+    """
+    # Excel文件路径
+    excel_path = r"D:\destkop\数据库对应数据.xlsx"
+    sheet_name = "Water_sample"  # 指定对应的sheet名称
+
+    try:
+        # 创建导入器并执行导入
+        importer = WaterSampleDataImporter(excel_path, sheet_name)
+        importer.run_import()
+
+    except Exception as e:
+        logger.error(f"程序执行失败: {str(e)}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()