|
@@ -0,0 +1,252 @@
|
|
|
+"""
|
|
|
+农田数据导入脚本
|
|
|
+@description: 从Excel文件读取Farmland数据并导入到Farmland_data表
|
|
|
+"""
|
|
|
+
|
|
|
+import os
|
|
|
+import sys
|
|
|
+import pandas as pd
|
|
|
+import logging
|
|
|
+from datetime import datetime
|
|
|
+from sqlalchemy.orm import sessionmaker
|
|
|
+from geoalchemy2 import WKTElement
|
|
|
+
|
|
|
+# 添加项目根目录到Python路径
|
|
|
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
+
|
|
|
+from app.database import engine, SessionLocal
|
|
|
+from app.models.farmland import FarmlandData
|
|
|
+
|
|
|
+# 设置日志
|
|
|
+logging.basicConfig(
|
|
|
+ level=logging.INFO,
|
|
|
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
|
+)
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
+
|
|
|
+class FarmlandDataImporter:
|
|
|
+ """
|
|
|
+ 农田数据导入器
|
|
|
+
|
|
|
+ @description: 从Excel文件读取农田数据并导入到数据库
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, excel_path, sheet_name='Farmland'):
|
|
|
+ """
|
|
|
+ 初始化导入器
|
|
|
+
|
|
|
+ @param {str} excel_path - Excel文件路径
|
|
|
+ @param {str} sheet_name - Sheet名称,默认为'Farmland'
|
|
|
+ """
|
|
|
+ self.excel_path = excel_path
|
|
|
+ self.sheet_name = sheet_name
|
|
|
+ self.type_mapping = {
|
|
|
+ '旱': 0.0,
|
|
|
+ '水田': 1.0,
|
|
|
+ '水浇地': 2.0
|
|
|
+ }
|
|
|
+
|
|
|
+ def read_excel_data(self):
|
|
|
+ """
|
|
|
+ 读取Excel文件数据
|
|
|
+
|
|
|
+ @returns: DataFrame 读取的数据
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ logger.info(f"开始读取Excel文件: {self.excel_path}")
|
|
|
+ logger.info(f"Sheet名称: {self.sheet_name}")
|
|
|
+
|
|
|
+ # 检查文件是否存在
|
|
|
+ if not os.path.exists(self.excel_path):
|
|
|
+ raise FileNotFoundError(f"Excel文件不存在: {self.excel_path}")
|
|
|
+
|
|
|
+ # 读取Excel文件
|
|
|
+ df = pd.read_excel(self.excel_path, sheet_name=self.sheet_name)
|
|
|
+
|
|
|
+ logger.info(f"成功读取数据,共 {len(df)} 行")
|
|
|
+ logger.info(f"数据列: {list(df.columns)}")
|
|
|
+
|
|
|
+ # 显示前几行数据供确认
|
|
|
+ logger.info("前5行数据预览:")
|
|
|
+ logger.info(df.head().to_string())
|
|
|
+
|
|
|
+ return df
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"读取Excel文件失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ def validate_data(self, df):
|
|
|
+ """
|
|
|
+ 验证数据格式和完整性
|
|
|
+
|
|
|
+ @param {DataFrame} df - 要验证的数据
|
|
|
+ @returns: DataFrame 验证后的数据
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ logger.info("开始验证数据...")
|
|
|
+
|
|
|
+ # 检查必需的列是否存在
|
|
|
+ required_columns = ['Farmland_ID', 'Sample_ID', 'lon', 'lan', 'Type']
|
|
|
+ missing_columns = [col for col in required_columns if col not in df.columns]
|
|
|
+
|
|
|
+ if missing_columns:
|
|
|
+ raise ValueError(f"缺少必需的列: {missing_columns}")
|
|
|
+
|
|
|
+ # 检查数据类型
|
|
|
+ logger.info("检查数据类型...")
|
|
|
+
|
|
|
+ # 转换数值类型
|
|
|
+ df['Farmland_ID'] = pd.to_numeric(df['Farmland_ID'], errors='coerce')
|
|
|
+ df['Sample_ID'] = pd.to_numeric(df['Sample_ID'], errors='coerce')
|
|
|
+ df['lon'] = pd.to_numeric(df['lon'], errors='coerce')
|
|
|
+ df['lan'] = pd.to_numeric(df['lan'], errors='coerce')
|
|
|
+
|
|
|
+ # 检查是否有无效的数值
|
|
|
+ if df[['Farmland_ID', 'Sample_ID', 'lon', 'lan']].isnull().any().any():
|
|
|
+ logger.warning("发现无效的数值,将跳过这些行")
|
|
|
+ invalid_rows = df[df[['Farmland_ID', 'Sample_ID', 'lon', 'lan']].isnull().any(axis=1)]
|
|
|
+ logger.warning(f"无效行数: {len(invalid_rows)}")
|
|
|
+ df = df.dropna(subset=['Farmland_ID', 'Sample_ID', 'lon', 'lan'])
|
|
|
+
|
|
|
+ # 转换Type字段
|
|
|
+ logger.info("转换Type字段...")
|
|
|
+ df['Type_Numeric'] = df['Type'].map(self.type_mapping)
|
|
|
+
|
|
|
+ # 检查未知的Type值
|
|
|
+ unknown_types = df[df['Type_Numeric'].isnull()]['Type'].unique()
|
|
|
+ if len(unknown_types) > 0:
|
|
|
+ logger.warning(f"发现未知的Type值: {unknown_types}")
|
|
|
+ logger.warning("将为未知Type设置默认值0.0(旱地)")
|
|
|
+ df['Type_Numeric'] = df['Type_Numeric'].fillna(0.0)
|
|
|
+
|
|
|
+ logger.info(f"数据验证完成,有效数据 {len(df)} 行")
|
|
|
+
|
|
|
+ return df
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"数据验证失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ def create_geometry(self, lon, lat):
|
|
|
+ """
|
|
|
+ 创建PostGIS Point几何对象
|
|
|
+
|
|
|
+ @param {float} lon - 经度
|
|
|
+ @param {float} lat - 纬度
|
|
|
+ @returns: WKTElement 几何对象
|
|
|
+ """
|
|
|
+ return WKTElement(f'POINT({lon} {lat})', srid=4326)
|
|
|
+
|
|
|
+ def import_data(self, df):
|
|
|
+ """
|
|
|
+ 将数据导入到数据库
|
|
|
+
|
|
|
+ @param {DataFrame} df - 要导入的数据
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ logger.info("开始导入数据到数据库...")
|
|
|
+
|
|
|
+ # 创建数据库会话
|
|
|
+ db = SessionLocal()
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 检查是否有重复数据
|
|
|
+ existing_count = db.query(FarmlandData).count()
|
|
|
+ logger.info(f"数据库中现有数据: {existing_count} 条")
|
|
|
+
|
|
|
+ # 批量创建对象
|
|
|
+ batch_size = 1000
|
|
|
+ total_rows = len(df)
|
|
|
+ imported_count = 0
|
|
|
+
|
|
|
+ for i in range(0, total_rows, batch_size):
|
|
|
+ batch_df = df.iloc[i:i+batch_size]
|
|
|
+ batch_objects = []
|
|
|
+
|
|
|
+ for _, row in batch_df.iterrows():
|
|
|
+ try:
|
|
|
+ # 创建FarmlandData对象
|
|
|
+ farmland_data = FarmlandData(
|
|
|
+ farmland_id=int(row['Farmland_ID']),
|
|
|
+ sample_id=int(row['Sample_ID']),
|
|
|
+ lon=float(row['lon']),
|
|
|
+ lan=float(row['lan']),
|
|
|
+ type=float(row['Type_Numeric']),
|
|
|
+ geom=self.create_geometry(row['lon'], row['lan'])
|
|
|
+ )
|
|
|
+ batch_objects.append(farmland_data)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"跳过行 {i+_}: {str(e)}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ if batch_objects:
|
|
|
+ # 批量插入
|
|
|
+ db.add_all(batch_objects)
|
|
|
+ db.commit()
|
|
|
+ imported_count += len(batch_objects)
|
|
|
+ logger.info(f"已导入 {imported_count}/{total_rows} 条数据")
|
|
|
+
|
|
|
+ logger.info(f"数据导入完成! 成功导入 {imported_count} 条数据")
|
|
|
+
|
|
|
+ # 验证导入结果
|
|
|
+ final_count = db.query(FarmlandData).count()
|
|
|
+ logger.info(f"导入后数据库总数据: {final_count} 条")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ db.rollback()
|
|
|
+ logger.error(f"数据导入失败,已回滚: {str(e)}")
|
|
|
+ raise
|
|
|
+ finally:
|
|
|
+ db.close()
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"数据导入过程失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ def run_import(self):
|
|
|
+ """
|
|
|
+ 执行完整的导入流程
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ logger.info("=" * 60)
|
|
|
+ logger.info("开始农田数据导入流程")
|
|
|
+ logger.info("=" * 60)
|
|
|
+
|
|
|
+ # 1. 读取Excel数据
|
|
|
+ df = self.read_excel_data()
|
|
|
+
|
|
|
+ # 2. 验证数据
|
|
|
+ df = self.validate_data(df)
|
|
|
+
|
|
|
+ # 3. 导入数据
|
|
|
+ self.import_data(df)
|
|
|
+
|
|
|
+ logger.info("=" * 60)
|
|
|
+ logger.info("农田数据导入流程完成!")
|
|
|
+ logger.info("=" * 60)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"导入流程失败: {str(e)}")
|
|
|
+ raise
|
|
|
+
|
|
|
+def main():
|
|
|
+ """
|
|
|
+ 主函数
|
|
|
+ """
|
|
|
+ # Excel文件路径
|
|
|
+ excel_path = r"C:\Users\drzha\Desktop\0614\数据库对应数据.xlsx"
|
|
|
+ sheet_name = "Farmland"
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 创建导入器并执行导入
|
|
|
+ importer = FarmlandDataImporter(excel_path, sheet_name)
|
|
|
+ importer.run_import()
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"程序执行失败: {str(e)}")
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|