|
@@ -869,4 +869,287 @@ class CdPredictionService:
|
|
|
return max(files, key=os.path.getctime)
|
|
|
return None
|
|
|
except Exception:
|
|
|
- return None
|
|
|
+ return None
|
|
|
+
|
|
|
+ # =============================================================================
|
|
|
+ # 统计信息方法
|
|
|
+ # =============================================================================
|
|
|
+
|
|
|
+ def get_crop_cd_statistics(self, county_name: str) -> Optional[Dict[str, Any]]:
|
|
|
+ """
|
|
|
+ 获取作物Cd预测结果的统计信息
|
|
|
+
|
|
|
+ @param {str} county_name - 县市名称
|
|
|
+ @returns {Optional[Dict[str, Any]]} 统计信息,如果没有数据则返回None
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 查找最新的预测结果文件
|
|
|
+ cd_system_path = self.config.get_cd_system_path()
|
|
|
+ final_data_path = os.path.join(cd_system_path, "data", "final", "Final_predictions_crop_cd.csv")
|
|
|
+
|
|
|
+ if not os.path.exists(final_data_path):
|
|
|
+ self.logger.warning(f"未找到作物Cd预测结果文件: {final_data_path}")
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 读取预测数据
|
|
|
+ df = pd.read_csv(final_data_path)
|
|
|
+
|
|
|
+ if 'Prediction' not in df.columns:
|
|
|
+ self.logger.warning("预测结果文件中缺少'Prediction'列")
|
|
|
+ return None
|
|
|
+
|
|
|
+ predictions = df['Prediction']
|
|
|
+
|
|
|
+ # 计算基础统计信息
|
|
|
+ basic_stats = {
|
|
|
+ "数据点总数": len(predictions),
|
|
|
+ "均值": float(predictions.mean()),
|
|
|
+ "中位数": float(predictions.median()),
|
|
|
+ "标准差": float(predictions.std()),
|
|
|
+ "最小值": float(predictions.min()),
|
|
|
+ "最大值": float(predictions.max()),
|
|
|
+ "25%分位数": float(predictions.quantile(0.25)),
|
|
|
+ "75%分位数": float(predictions.quantile(0.75)),
|
|
|
+ "偏度": float(predictions.skew()),
|
|
|
+ "峰度": float(predictions.kurtosis())
|
|
|
+ }
|
|
|
+
|
|
|
+ # 计算分布直方图数据
|
|
|
+ histogram_data = self._calculate_histogram_data(predictions)
|
|
|
+
|
|
|
+ # 计算空间统计信息(如果有坐标信息)
|
|
|
+ spatial_stats = None
|
|
|
+ if 'longitude' in df.columns and 'latitude' in df.columns:
|
|
|
+ spatial_stats = self._calculate_spatial_statistics(df)
|
|
|
+
|
|
|
+ return {
|
|
|
+ "模型类型": "作物Cd模型",
|
|
|
+ "县市名称": county_name,
|
|
|
+ "数据更新时间": datetime.fromtimestamp(os.path.getmtime(final_data_path)).isoformat(),
|
|
|
+ "基础统计": basic_stats,
|
|
|
+ "分布直方图": histogram_data,
|
|
|
+ "空间统计": spatial_stats
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ self.logger.error(f"获取作物Cd统计信息失败: {str(e)}")
|
|
|
+ return None
|
|
|
+
|
|
|
+ def get_effective_cd_statistics(self, county_name: str) -> Optional[Dict[str, Any]]:
|
|
|
+ """
|
|
|
+ 获取有效态Cd预测结果的统计信息
|
|
|
+
|
|
|
+ @param {str} county_name - 县市名称
|
|
|
+ @returns {Optional[Dict[str, Any]]} 统计信息,如果没有数据则返回None
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 查找最新的预测结果文件
|
|
|
+ cd_system_path = self.config.get_cd_system_path()
|
|
|
+ final_data_path = os.path.join(cd_system_path, "data", "final", "Final_predictions_effective_cd.csv")
|
|
|
+
|
|
|
+ if not os.path.exists(final_data_path):
|
|
|
+ self.logger.warning(f"未找到有效态Cd预测结果文件: {final_data_path}")
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 读取预测数据
|
|
|
+ df = pd.read_csv(final_data_path)
|
|
|
+
|
|
|
+ if 'Prediction' not in df.columns:
|
|
|
+ self.logger.warning("预测结果文件中缺少'Prediction'列")
|
|
|
+ return None
|
|
|
+
|
|
|
+ predictions = df['Prediction']
|
|
|
+
|
|
|
+ # 计算基础统计信息
|
|
|
+ basic_stats = {
|
|
|
+ "数据点总数": len(predictions),
|
|
|
+ "均值": float(predictions.mean()),
|
|
|
+ "中位数": float(predictions.median()),
|
|
|
+ "标准差": float(predictions.std()),
|
|
|
+ "最小值": float(predictions.min()),
|
|
|
+ "最大值": float(predictions.max()),
|
|
|
+ "25%分位数": float(predictions.quantile(0.25)),
|
|
|
+ "75%分位数": float(predictions.quantile(0.75)),
|
|
|
+ "偏度": float(predictions.skew()),
|
|
|
+ "峰度": float(predictions.kurtosis())
|
|
|
+ }
|
|
|
+
|
|
|
+ # 计算分布直方图数据
|
|
|
+ histogram_data = self._calculate_histogram_data(predictions)
|
|
|
+
|
|
|
+ # 计算空间统计信息(如果有坐标信息)
|
|
|
+ spatial_stats = None
|
|
|
+ if 'longitude' in df.columns and 'latitude' in df.columns:
|
|
|
+ spatial_stats = self._calculate_spatial_statistics(df)
|
|
|
+
|
|
|
+ return {
|
|
|
+ "模型类型": "有效态Cd模型",
|
|
|
+ "县市名称": county_name,
|
|
|
+ "数据更新时间": datetime.fromtimestamp(os.path.getmtime(final_data_path)).isoformat(),
|
|
|
+ "基础统计": basic_stats,
|
|
|
+ "分布直方图": histogram_data,
|
|
|
+ "空间统计": spatial_stats
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ self.logger.error(f"获取有效态Cd统计信息失败: {str(e)}")
|
|
|
+ return None
|
|
|
+
|
|
|
+ def get_combined_statistics(self, county_name: str) -> Optional[Dict[str, Any]]:
|
|
|
+ """
|
|
|
+ 获取综合预测统计信息
|
|
|
+
|
|
|
+ @param {str} county_name - 县市名称
|
|
|
+ @returns {Optional[Dict[str, Any]]} 综合统计信息,如果没有数据则返回None
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ crop_stats = self.get_crop_cd_statistics(county_name)
|
|
|
+ effective_stats = self.get_effective_cd_statistics(county_name)
|
|
|
+
|
|
|
+ if not crop_stats and not effective_stats:
|
|
|
+ return None
|
|
|
+
|
|
|
+ return {
|
|
|
+ "县市名称": county_name,
|
|
|
+ "作物Cd统计": crop_stats,
|
|
|
+ "有效态Cd统计": effective_stats,
|
|
|
+ "生成时间": datetime.now().isoformat()
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ self.logger.error(f"获取综合统计信息失败: {str(e)}")
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ def get_all_counties_statistics(self) -> Dict[str, Any]:
|
|
|
+ """
|
|
|
+ 获取所有支持县市的统计概览
|
|
|
+
|
|
|
+ @returns {Dict[str, Any]} 所有县市的统计概览
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ all_stats = {
|
|
|
+ "支持县市总数": len(self.supported_counties),
|
|
|
+ "统计生成时间": datetime.now().isoformat(),
|
|
|
+ "县市统计": {},
|
|
|
+ "汇总信息": {
|
|
|
+ "有作物Cd数据的县市": 0,
|
|
|
+ "有有效态Cd数据的县市": 0,
|
|
|
+ "数据完整的县市": 0
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for county_name in self.supported_counties.keys():
|
|
|
+ county_stats = {
|
|
|
+ "县市名称": county_name,
|
|
|
+ "有作物Cd数据": False,
|
|
|
+ "有有效态Cd数据": False,
|
|
|
+ "数据完整": False,
|
|
|
+ "最新更新时间": None
|
|
|
+ }
|
|
|
+
|
|
|
+ # 检查作物Cd数据
|
|
|
+ crop_stats = self.get_crop_cd_statistics(county_name)
|
|
|
+ if crop_stats:
|
|
|
+ county_stats["有作物Cd数据"] = True
|
|
|
+ county_stats["作物Cd概要"] = {
|
|
|
+ "数据点数": crop_stats["基础统计"]["数据点总数"],
|
|
|
+ "均值": crop_stats["基础统计"]["均值"],
|
|
|
+ "最大值": crop_stats["基础统计"]["最大值"]
|
|
|
+ }
|
|
|
+ all_stats["汇总信息"]["有作物Cd数据的县市"] += 1
|
|
|
+
|
|
|
+ # 检查有效态Cd数据
|
|
|
+ effective_stats = self.get_effective_cd_statistics(county_name)
|
|
|
+ if effective_stats:
|
|
|
+ county_stats["有有效态Cd数据"] = True
|
|
|
+ county_stats["有效态Cd概要"] = {
|
|
|
+ "数据点数": effective_stats["基础统计"]["数据点总数"],
|
|
|
+ "均值": effective_stats["基础统计"]["均值"],
|
|
|
+ "最大值": effective_stats["基础统计"]["最大值"]
|
|
|
+ }
|
|
|
+ all_stats["汇总信息"]["有有效态Cd数据的县市"] += 1
|
|
|
+
|
|
|
+ # 检查数据完整性
|
|
|
+ if county_stats["有作物Cd数据"] and county_stats["有有效态Cd数据"]:
|
|
|
+ county_stats["数据完整"] = True
|
|
|
+ all_stats["汇总信息"]["数据完整的县市"] += 1
|
|
|
+
|
|
|
+ all_stats["县市统计"][county_name] = county_stats
|
|
|
+
|
|
|
+ return all_stats
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ self.logger.error(f"获取所有县市统计概览失败: {str(e)}")
|
|
|
+ return {
|
|
|
+ "error": f"获取统计概览失败: {str(e)}",
|
|
|
+ "支持县市总数": len(self.supported_counties),
|
|
|
+ "统计生成时间": datetime.now().isoformat()
|
|
|
+ }
|
|
|
+
|
|
|
+ # =============================================================================
|
|
|
+ # 辅助统计方法
|
|
|
+ # =============================================================================
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ def _calculate_histogram_data(self, predictions: pd.Series, bins: int = 20) -> Dict[str, Any]:
|
|
|
+ """
|
|
|
+ 计算分布直方图数据
|
|
|
+
|
|
|
+ @param {pd.Series} predictions - 预测值
|
|
|
+ @param {int} bins - 直方图区间数
|
|
|
+ @returns {Dict[str, Any]} 直方图数据
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ import numpy as np
|
|
|
+
|
|
|
+ hist, bin_edges = np.histogram(predictions, bins=bins)
|
|
|
+
|
|
|
+ # 计算区间中心点
|
|
|
+ bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
|
|
|
+
|
|
|
+ return {
|
|
|
+ "区间数": int(bins),
|
|
|
+ "频次": [int(count) for count in hist],
|
|
|
+ "区间中心": [float(center) for center in bin_centers],
|
|
|
+ "区间边界": [float(edge) for edge in bin_edges],
|
|
|
+ "总频次": int(hist.sum())
|
|
|
+ }
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ self.logger.error(f"计算直方图数据失败: {str(e)}")
|
|
|
+ return {}
|
|
|
+
|
|
|
+ def _calculate_spatial_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
|
|
|
+ """
|
|
|
+ 计算空间统计信息
|
|
|
+
|
|
|
+ @param {pd.DataFrame} df - 包含坐标和预测值的数据框
|
|
|
+ @returns {Dict[str, Any]} 空间统计信息
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ spatial_stats = {
|
|
|
+ "经度范围": {
|
|
|
+ "最小值": float(df['longitude'].min()),
|
|
|
+ "最大值": float(df['longitude'].max()),
|
|
|
+ "跨度": float(df['longitude'].max() - df['longitude'].min())
|
|
|
+ },
|
|
|
+ "纬度范围": {
|
|
|
+ "最小值": float(df['latitude'].min()),
|
|
|
+ "最大值": float(df['latitude'].max()),
|
|
|
+ "跨度": float(df['latitude'].max() - df['latitude'].min())
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return spatial_stats
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ self.logger.error(f"计算空间统计信息失败: {str(e)}")
|
|
|
+ return {}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|