1 week ago · 2fd21973ab
--- a/app/api/water.py
+++ b/app/api/water.py
@@ -127,7 +127,8 @@ async def recalculate_land_data(
 
															         enable_interpolation: Optional[bool] = Form(False, description="是否启用空间插值，默认启用"),
														
 
															         interpolation_method: Optional[str] = Form("linear", description="插值方法: nearest | linear | cubic"),
														
 
															         resolution_factor: Optional[float] = Form(4.0, description="分辨率因子，默认4.0，越大分辨率越高"),
														
 
															-        save_csv: Optional[bool] = Form(True, description="是否生成CSV文件，默认生成")
														
 
															+        save_csv: Optional[bool] = Form(True, description="是否生成CSV文件，默认生成"),
														
 
															+        cleanup_temp_files: Optional[bool] = Form(True, description="是否清理临时文件，默认清理")
														
 
															 ) -> Dict[str, Any]:
														
 
															     """重新计算土地数据并返回结果路径，支持动态边界控制和插值控制"""
														
 
															     try:
														
@@ -161,7 +162,8 @@ async def recalculate_land_data(
 
															             enable_interpolation=enable_interpolation,
														
 
															             interpolation_method=interpolation_method,
														
 
															             resolution_factor=resolution_factor,
														
 
															-            save_csv=save_csv  # 将CSV生成选项传递给处理函数
														
 
															+            save_csv=save_csv,  # 将CSV生成选项传递给处理函数
														
 
															+            cleanup_temp_files=cleanup_temp_files  # 将清理选项传递给处理函数
														
 
															         )
														
 
															         if not results:
														
--- a/app/services/water_service.py
+++ b/app/services/water_service.py
@@ -4,12 +4,14 @@ import pandas as pd
 
															 from pyproj import Transformer
														
 
															 from shapely.geometry import Point
														
 
															 import rasterio
														
 
															-from typing import Optional, Dict, Any
														
 
															+from typing import Optional, Dict, Any, List
														
 
															 from datetime import datetime
														
 
															 import numpy as np
														
 
															 import logging
														
 
															 import shutil
														
 
															 import sys
														
 
															+from sklearn.neighbors import BallTree
														
 
															+from time import time
														
 
															 # 导入MappingUtils
														
 
															 from ..utils.mapping_utils import MappingUtils, csv_to_raster_workflow, dataframe_to_raster_workflow
														
@@ -86,7 +88,116 @@ def get_boundary_gdf_from_database(area: str, level: str) -> Optional[gpd.GeoDat
 
															     return None
														
 
															+def find_nearest_sampling_points_optimized(land_centers_df: pd.DataFrame, 
														
 
															+                                          sampling_points_df: pd.DataFrame) -> np.ndarray:
														
 
															+    """
														
 
															+    使用BallTree高效计算每个土地中心点的最近采样点
														
 
															+    
														
 
															+    @description: 使用空间索引优化最近邻搜索，将O(n×m)复杂度降低到O(n×log(m))
														
 
															+    
														
 
															+    @param land_centers_df: 土地中心点数据，包含center_lon和center_lat列
														
 
															+    @param sampling_points_df: 采样点数据，包含经度和纬度列
														
 
															+    @returns: 每个土地中心点对应的最近采样点索引数组
														
 
															+    """
														
 
															+    logger.info("开始构建空间索引优化最近邻搜索...")
														
 
															+    
														
 
															+    start_time = time()
														
 
															+    
														
 
															+    # 1. 准备采样点坐标数据（转换为弧度用于BallTree）
														
 
															+    sampling_coords = np.radians(sampling_points_df[['经度', '纬度']].values)
														
 
															+    
														
 
															+    # 2. 构建BallTree空间索引
														
 
															+    logger.info(f"构建BallTree索引，采样点数量: {len(sampling_coords)}")
														
 
															+    tree = BallTree(sampling_coords, metric='haversine')
														
 
															+    
														
 
															+    # 3. 准备土地中心点坐标数据
														
 
															+    land_coords = np.radians(land_centers_df[['center_lon', 'center_lat']].values)
														
 
															+    
														
 
															+    # 4. 批量查询最近邻（k=1表示只找最近的一个点）
														
 
															+    logger.info(f"批量查询最近邻，土地中心点数量: {len(land_coords)}")
														
 
															+    distances, indices = tree.query(land_coords, k=1)
														
 
															+    
														
 
															+    # 5. 提取索引（indices是二维数组，我们只需要第一列）
														
 
															+    nearest_indices = indices.flatten()
														
 
															+    
														
 
															+    elapsed_time = time() - start_time
														
 
															+    logger.info(f"空间索引搜索完成，耗时: {elapsed_time:.2f}秒")
														
 
															+    logger.info(f"平均每个点查询时间: {elapsed_time/len(land_coords)*1000:.2f}毫秒")
														
 
															+    
														
 
															+    return nearest_indices
														
 
															+def cleanup_temporary_files(*file_paths):
														
 
															+    """
														
 
															+    清理临时文件
														
 
															+    
														
 
															+    @description: 安全地删除指定的临时文件，支持多种文件类型
														
 
															+    @param file_paths: 要删除的文件路径（可变参数）
														
 
															+    """
														
 
															+    import tempfile
														
 
															+    
														
 
															+    for file_path in file_paths:
														
 
															+        if not file_path:
														
 
															+            continue
														
 
															+            
														
 
															+        try:
														
 
															+            if os.path.exists(file_path) and os.path.isfile(file_path):
														
 
															+                os.remove(file_path)
														
 
															+                logger.info(f"已清理临时文件: {os.path.basename(file_path)}")
														
 
															+                
														
 
															+                # 如果是shapefile，也删除相关的配套文件
														
 
															+                if file_path.endswith('.shp'):
														
 
															+                    base_path = os.path.splitext(file_path)[0]
														
 
															+                    for ext in ['.shx', '.dbf', '.prj', '.cpg']:
														
 
															+                        related_file = base_path + ext
														
 
															+                        if os.path.exists(related_file):
														
 
															+                            os.remove(related_file)
														
 
															+                            logger.info(f"已清理相关文件: {os.path.basename(related_file)}")
														
 
															+                            
														
 
															+        except Exception as e:
														
 
															+            logger.warning(f"清理文件失败 {file_path}: {str(e)}")
														
 
															+
														
 
															+
														
 
															+def cleanup_temp_files_in_directory(directory: str, patterns: List[str] = None) -> int:
														
 
															+    """
														
 
															+    清理指定目录下的临时文件
														
 
															+    
														
 
															+    @description: 根据文件名模式清理目录中的临时文件
														
 
															+    @param directory: 要清理的目录路径
														
 
															+    @param patterns: 文件名模式列表，默认为['memory_raster_', 'temp_', 'tmp_']
														
 
															+    @returns: 清理的文件数量
														
 
															+    """
														
 
															+    if patterns is None:
														
 
															+        patterns = ['memory_raster_', 'temp_', 'tmp_']
														
 
															+    
														
 
															+    if not os.path.exists(directory) or not os.path.isdir(directory):
														
 
															+        logger.warning(f"目录不存在或不是有效目录: {directory}")
														
 
															+        return 0
														
 
															+    
														
 
															+    cleaned_count = 0
														
 
															+    
														
 
															+    try:
														
 
															+        for filename in os.listdir(directory):
														
 
															+            file_path = os.path.join(directory, filename)
														
 
															+            
														
 
															+            # 检查是否是文件
														
 
															+            if not os.path.isfile(file_path):
														
 
															+                continue
														
 
															+                
														
 
															+            # 检查文件名是否匹配任何模式
														
 
															+            should_clean = any(pattern in filename for pattern in patterns)
														
 
															+            
														
 
															+            if should_clean:
														
 
															+                try:
														
 
															+                    os.remove(file_path)
														
 
															+                    logger.info(f"已清理临时文件: {filename}")
														
 
															+                    cleaned_count += 1
														
 
															+                except Exception as e:
														
 
															+                    logger.warning(f"清理文件失败 {filename}: {str(e)}")
														
 
															+                    
														
 
															+    except Exception as e:
														
 
															+        logger.error(f"清理目录失败 {directory}: {str(e)}")
														
 
															+        
														
 
															+    return cleaned_count
														
 
															 # 土地数据处理函数
														
@@ -107,6 +218,7 @@ def process_land_data(land_type, coefficient_params=None, save_csv=True):
 
															         return None, None, None
														
 
															     logger.info(f"从数据库获取到 {len(land_centers_df)} 个 '{land_type}' 类型的土地数据")
														
 
															+    logger.info(f"预计需要进行 {len(land_centers_df)} 次最近邻搜索，使用高性能算法处理...")
														
 
															     # 读取Excel采样点数据
														
 
															     if not os.path.exists(xls_file):
														
@@ -128,26 +240,20 @@ def process_land_data(land_type, coefficient_params=None, save_csv=True):
 
															     Num = param1 * param2
														
 
															     logger.info(f"系数: {param1} * {param2} = {Num}")
														
 
															-    # 处理每个面要素，使用数据库中的中心点坐标
														
 
															-    cd_values = []
														
 
															-    centers = []
														
 
															+    # 高效处理：使用空间索引查找最近采样点
														
 
															+    logger.info("开始高效距离计算和Cd值计算...")
														
 
															+    start_time = time()
														
 
															-    for index, row in land_centers_df.iterrows():
														
 
															-        center_lon = row['center_lon']
														
 
															-        center_lat = row['center_lat']
														
 
															-        centers.append((center_lon, center_lat))
														
 
															-
														
 
															-        # 计算到所有采样点的距离
														
 
															-        distances = df_xls.apply(
														
 
															-            lambda x: Point(center_lon, center_lat).distance(Point(x['经度'], x['纬度'])),
														
 
															-            axis=1
														
 
															-        )
														
 
															-        min_idx = distances.idxmin()
														
 
															-        nearest = df_xls.loc[min_idx]
														
 
															-
														
 
															-        # 计算Cd含量值
														
 
															-        cd_value = nearest['Cd (ug/L)'] * Num
														
 
															-        cd_values.append(cd_value)
														
 
															+    # 使用优化的空间索引方法查找最近采样点
														
 
															+    nearest_indices = find_nearest_sampling_points_optimized(land_centers_df, df_xls)
														
 
															+    
														
 
															+    # 批量计算Cd含量值
														
 
															+    centers = list(zip(land_centers_df['center_lon'], land_centers_df['center_lat']))
														
 
															+    cd_values = df_xls.iloc[nearest_indices]['Cd (ug/L)'].values * Num
														
 
															+    
														
 
															+    calculation_time = time() - start_time
														
 
															+    logger.info(f"Cd值计算完成，耗时: {calculation_time:.2f}秒")
														
 
															+    logger.info(f"处理了 {len(centers)} 个土地中心点")
														
 
															     # 创建简化数据DataFrame
														
 
															     simplified_data = pd.DataFrame({
														
@@ -297,7 +403,8 @@ def process_land_to_visualization(land_type, coefficient_params=None,
 
															                                   enable_interpolation: Optional[bool] = True,
														
 
															                                   interpolation_method: Optional[str] = "linear",
														
 
															                                   resolution_factor: Optional[float] = 4.0,
														
 
															-                                  save_csv: Optional[bool] = True):
														
 
															+                                  save_csv: Optional[bool] = True,
														
 
															+                                  cleanup_temp_files: Optional[bool] = True):
														
 
															     """
														
 
															     完整的土地数据处理可视化流程（使用统一的MappingUtils接口，支持动态边界和插值控制）
														
@@ -321,6 +428,7 @@ def process_land_to_visualization(land_type, coefficient_params=None,
 
															     @param interpolation_method: 插值方法，nearest | linear | cubic，默认linear
														
 
															     @param resolution_factor: 分辨率因子，默认4.0，越大分辨率越高
														
 
															     @param save_csv: 是否生成CSV文件，默认True
														
 
															+    @param cleanup_temp_files: 是否清理临时文件，默认True
														
 
															     @returns: 包含所有生成文件路径的元组
														
 
															     """
														
 
															     base_dir = get_base_dir()
														
@@ -441,6 +549,38 @@ def process_land_to_visualization(land_type, coefficient_params=None,
 
															         data_dir = os.path.join(base_dir, "..", "static", "water", "Data")
														
 
															         cleaned_csv = os.path.join(data_dir, f"中心点经纬度与预测值&{land_type}_清洗.csv")
														
 
															+    # 清理临时文件（如果启用）
														
 
															+    if cleanup_temp_files:
														
 
															+        logger.info("开始清理临时文件...")
														
 
															+        
														
 
															+        # 要清理的临时文件列表
														
 
															+        temp_files_to_cleanup = []
														
 
															+        
														
 
															+        # 添加临时栅格文件（如果是memory_raster_开头的）
														
 
															+        if output_tif and 'memory_raster_' in os.path.basename(output_tif):
														
 
															+            temp_files_to_cleanup.append(output_tif)
														
 
															+            
														
 
															+        # 添加临时shapefile（如果存在且是临时文件）
														
 
															+        temp_shapefile = workflow_result.get('shapefile')
														
 
															+        if temp_shapefile and ('temp' in temp_shapefile.lower() or 'memory' in temp_shapefile.lower()):
														
 
															+            temp_files_to_cleanup.append(temp_shapefile)
														
 
															+        
														
 
															+        # 如果不保存CSV，也清理CSV文件
														
 
															+        if not save_csv and cleaned_csv_path and os.path.exists(cleaned_csv_path):
														
 
															+            temp_files_to_cleanup.append(cleaned_csv_path)
														
 
															+            
														
 
															+        # 执行清理
														
 
															+        if temp_files_to_cleanup:
														
 
															+            cleanup_temporary_files(*temp_files_to_cleanup)
														
 
															+            logger.info(f"已清理 {len(temp_files_to_cleanup)} 个临时文件")
														
 
															+            
														
 
															+            # 如果清理了栅格文件，将返回路径设为None以避免引用已删除的文件
														
 
															+            if output_tif in temp_files_to_cleanup:
														
 
															+                output_tif = None
														
 
															+                logger.info("注意：临时栅格文件已被清理，返回的栅格路径为None")
														
 
															+        else:
														
 
															+            logger.info("没有临时文件需要清理")
														
 
															+    
														
 
															     return cleaned_csv, workflow_result['shapefile'], output_tif, map_output, hist_output, used_coeff
														
@@ -544,6 +684,13 @@ def main():
 
															     except Exception as e:
														
 
															         logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True)
														
 
															     finally:
														
 
															+        # 清理临时文件
														
 
															+        base_dir = get_base_dir()
														
 
															+        raster_dir = os.path.join(base_dir, "..", "static", "water", "Raster")
														
 
															+        cleaned_count = cleanup_temp_files_in_directory(raster_dir)
														
 
															+        if cleaned_count > 0:
														
 
															+            logger.info(f"已清理 {cleaned_count} 个临时文件")
														
 
															+        
														
 
															         logger.info("处理完成")