mapping_utils.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. """
  2. 通用绘图工具模块
  3. Universal Mapping and Visualization Utils
  4. 整合了CSV转GeoTIFF、栅格地图绘制和直方图生成功能
  5. 基于01_Transfer_csv_to_geotif.py和02_Figure_raster_mapping.py的更新代码
  6. Author: Integrated from Wanxue Zhu's code
  7. """
  8. import pandas as pd
  9. import geopandas as gpd
  10. from shapely.geometry import Point
  11. import rasterio
  12. from rasterio.features import rasterize, geometry_mask
  13. from rasterio.transform import from_origin
  14. from rasterio.mask import mask
  15. from rasterio.plot import show
  16. from rasterio.warp import transform_bounds, reproject
  17. from rasterio.enums import Resampling
  18. import numpy as np
  19. import os
  20. import json
  21. import logging
  22. from scipy.interpolate import griddata
  23. from scipy.ndimage import distance_transform_edt
  24. import matplotlib.pyplot as plt
  25. from matplotlib.colors import ListedColormap, BoundaryNorm
  26. import seaborn as sns
  27. import warnings
  28. warnings.filterwarnings('ignore')
  29. # 配置日志
  30. from app.log.logger import get_logger
  31. logger = get_logger(__name__)
  32. # 设置matplotlib的中文字体和样式
  33. plt.rcParams['font.family'] = 'Arial'
  34. plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
  35. plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei'] # 添加多个中文字体
  36. # 预定义的色彩方案
  37. COLORMAPS = {
  38. 'yellow_orange_brown': ['#FFFECE', '#FFF085', '#FEBA17', '#BE3D2A', '#74512D', '#4E1F00'], # 黄-橙-棕
  39. 'blue_series': ['#F6F8D5', '#98D2C0', '#4F959D', '#205781', '#143D60', '#2A3335'], # 蓝色系
  40. 'yellow_green': ['#FFEFC8', '#F8ED8C', '#D3E671', '#89AC46', '#5F8B4C', '#355F2E'], # 淡黄-草绿
  41. 'green_brown': ['#F0F1C5', '#BBD8A3', '#6F826A', '#BF9264', '#735557', '#604652'], # 绿色-棕色
  42. 'yellow_pink_purple': ['#FCFAEE', '#FBF3B9', '#FFDCCC', '#FDB7EA', '#B7B1F2', '#8D77AB'], # 黄-粉-紫
  43. 'green_yellow_red_purple': ['#15B392', '#73EC8B', '#FFEB55', '#EE66A6', '#D91656', '#640D5F'], # 绿-黄-红-紫
  44. }
  45. class MappingUtils:
  46. """
  47. 通用绘图工具类
  48. 提供CSV转换、栅格处理、地图绘制和直方图生成功能
  49. """
  50. def __init__(self, log_level=logging.INFO):
  51. """
  52. 初始化绘图工具
  53. @param log_level: 日志级别
  54. """
  55. self.logger = logging.getLogger(self.__class__.__name__)
  56. self.logger.setLevel(log_level)
  57. # 避免重复添加处理器,并防止日志传播到父级处理器导致重复输出
  58. if not self.logger.handlers:
  59. handler = logging.StreamHandler()
  60. formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  61. handler.setFormatter(formatter)
  62. self.logger.addHandler(handler)
  63. # 关闭日志传播,避免与全局basicConfig冲突
  64. self.logger.propagate = False
  65. def csv_to_shapefile(self, csv_file, shapefile_output, lon_col=0, lat_col=1, value_col=2):
  66. """
  67. 将CSV文件转换为Shapefile文件
  68. @param csv_file: CSV文件路径
  69. @param shapefile_output: 输出Shapefile文件路径
  70. @param lon_col: 经度列索引或列名,默认第0列
  71. @param lat_col: 纬度列索引或列名,默认第1列
  72. @param value_col: 数值列索引或列名,默认第2列
  73. @return: 输出的shapefile路径
  74. """
  75. try:
  76. self.logger.info(f"开始转换CSV到Shapefile: {csv_file}")
  77. # 读取CSV数据
  78. df = pd.read_csv(csv_file)
  79. # 支持列索引或列名
  80. if isinstance(lon_col, int):
  81. lon = df.iloc[:, lon_col]
  82. else:
  83. lon = df[lon_col]
  84. if isinstance(lat_col, int):
  85. lat = df.iloc[:, lat_col]
  86. else:
  87. lat = df[lat_col]
  88. if isinstance(value_col, int):
  89. val = df.iloc[:, value_col]
  90. else:
  91. val = df[value_col]
  92. # 创建几何对象
  93. geometry = [Point(xy) for xy in zip(lon, lat)]
  94. gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")
  95. # 确保输出目录存在
  96. os.makedirs(os.path.dirname(shapefile_output), exist_ok=True)
  97. # 保存Shapefile
  98. gdf.to_file(shapefile_output, driver="ESRI Shapefile")
  99. self.logger.info(f"✓ 成功转换CSV到Shapefile: {shapefile_output}")
  100. return shapefile_output
  101. except Exception as e:
  102. self.logger.error(f"CSV转Shapefile失败: {str(e)}")
  103. raise
  104. def dataframe_to_geodataframe(self, df, lon_col=0, lat_col=1, value_col=2, field_name='Prediction'):
  105. """
  106. 将DataFrame直接转换为GeoDataFrame(内存处理)
  107. @param df: pandas DataFrame
  108. @param lon_col: 经度列索引或列名,默认第0列
  109. @param lat_col: 纬度列索引或列名,默认第1列
  110. @param value_col: 数值列索引或列名,默认第2列
  111. @param field_name: 值字段名称
  112. @return: GeoDataFrame
  113. """
  114. try:
  115. self.logger.info("开始将DataFrame转换为GeoDataFrame(内存处理)")
  116. # 支持列索引或列名
  117. if isinstance(lon_col, int):
  118. lon = df.iloc[:, lon_col]
  119. else:
  120. lon = df[lon_col]
  121. if isinstance(lat_col, int):
  122. lat = df.iloc[:, lat_col]
  123. else:
  124. lat = df[lat_col]
  125. if isinstance(value_col, int):
  126. val = df.iloc[:, value_col]
  127. else:
  128. val = df[value_col]
  129. # 创建几何对象
  130. geometry = [Point(xy) for xy in zip(lon, lat)]
  131. # 创建新的DataFrame,只包含必要的列
  132. data = {field_name: val}
  133. gdf = gpd.GeoDataFrame(data, geometry=geometry, crs="EPSG:4326")
  134. self.logger.info(f"✓ 成功转换DataFrame到GeoDataFrame: {len(gdf)} 个点")
  135. return gdf
  136. except Exception as e:
  137. self.logger.error(f"DataFrame转GeoDataFrame失败: {str(e)}")
  138. raise
  139. def create_boundary_mask(self, raster, transform, gdf):
  140. """
  141. 创建边界掩膜,只保留边界内的区域
  142. @param raster: 栅格数据
  143. @param transform: 栅格变换参数
  144. @param gdf: 矢量边界数据
  145. @return: 边界掩膜
  146. """
  147. try:
  148. mask = rasterize(
  149. gdf.geometry,
  150. out_shape=raster.shape,
  151. transform=transform,
  152. fill=0,
  153. default_value=1,
  154. dtype=np.uint8
  155. )
  156. return mask.astype(bool)
  157. except Exception as e:
  158. self.logger.error(f"创建边界掩膜失败: {str(e)}")
  159. raise
  160. def interpolate_nan_values(self, raster, method='nearest'):
  161. """
  162. 使用插值方法填充NaN值
  163. @param raster: 包含NaN值的栅格数据
  164. @param method: 插值方法 ('nearest', 'linear', 'cubic')
  165. @return: 插值后的栅格数据
  166. """
  167. try:
  168. if not np.isnan(raster).any():
  169. return raster
  170. # 获取有效值的坐标
  171. valid_mask = ~np.isnan(raster)
  172. valid_coords = np.where(valid_mask)
  173. valid_values = raster[valid_mask]
  174. if len(valid_values) == 0:
  175. self.logger.warning("没有有效值用于插值")
  176. return raster
  177. # 创建网格坐标
  178. rows, cols = raster.shape
  179. grid_x, grid_y = np.mgrid[0:rows, 0:cols]
  180. # 准备插值坐标
  181. points = np.column_stack((valid_coords[0], valid_coords[1]))
  182. # 执行插值
  183. interpolated = griddata(points, valid_values, (grid_x, grid_y),
  184. method=method, fill_value=np.nan)
  185. # 如果插值后仍有NaN值,使用最近邻方法填充
  186. if np.isnan(interpolated).any():
  187. self.logger.info(f"使用 {method} 插值后仍有NaN值,使用最近邻方法填充剩余值")
  188. remaining_nan = np.isnan(interpolated)
  189. remaining_coords = np.where(remaining_nan)
  190. if len(remaining_coords[0]) > 0:
  191. # 使用距离变换找到最近的已知值
  192. dist, indices = distance_transform_edt(remaining_nan,
  193. return_distances=True,
  194. return_indices=True)
  195. # 填充剩余的NaN值
  196. for i, j in zip(remaining_coords[0], remaining_coords[1]):
  197. if indices[0, i, j] < rows and indices[1, i, j] < cols:
  198. interpolated[i, j] = raster[indices[0, i, j], indices[1, i, j]]
  199. return interpolated
  200. except Exception as e:
  201. self.logger.error(f"插值失败: {str(e)}")
  202. return raster
  203. def geodataframe_to_raster(self, gdf, template_tif, output_tif, field,
  204. resolution_factor=16.0, boundary_gdf=None, interpolation_method='nearest', enable_interpolation=True):
  205. """
  206. 将GeoDataFrame直接转换为栅格数据(内存处理版本)
  207. @param gdf: 输入的GeoDataFrame
  208. @param template_tif: 用作模板的GeoTIFF文件路径
  209. @param output_tif: 输出栅格化后的GeoTIFF文件路径
  210. @param field: 用于栅格化的属性字段名
  211. @param resolution_factor: 分辨率倍数因子
  212. @param boundary_gdf: 边界GeoDataFrame,用于创建掩膜
  213. @param interpolation_method: 插值方法 ('nearest', 'linear', 'cubic')
  214. @param enable_interpolation: 是否启用空间插值,默认True
  215. @return: 输出的GeoTIFF文件路径和统计信息
  216. """
  217. try:
  218. self.logger.info(f"开始处理GeoDataFrame到栅格(内存处理)")
  219. interpolation_status = "启用" if enable_interpolation else "禁用"
  220. self.logger.info(f"分辨率因子: {resolution_factor}, 插值设置: {interpolation_status} (方法: {interpolation_method})")
  221. # 读取模板栅格
  222. with rasterio.open(template_tif) as src:
  223. template_meta = src.meta.copy()
  224. # 根据分辨率因子计算新的尺寸和变换参数
  225. if resolution_factor != 1.0:
  226. width = int(src.width * resolution_factor)
  227. height = int(src.height * resolution_factor)
  228. transform = rasterio.Affine(
  229. src.transform.a / resolution_factor,
  230. src.transform.b,
  231. src.transform.c,
  232. src.transform.d,
  233. src.transform.e / resolution_factor,
  234. src.transform.f
  235. )
  236. self.logger.info(f"分辨率调整: {src.width}x{src.height} -> {width}x{height}")
  237. else:
  238. width = src.width
  239. height = src.height
  240. transform = src.transform
  241. self.logger.info(f"保持原始分辨率: {width}x{height}")
  242. crs = src.crs
  243. # 投影矢量数据
  244. if gdf.crs != crs:
  245. gdf = gdf.to_crs(crs)
  246. # 栅格化
  247. shapes = ((geom, value) for geom, value in zip(gdf.geometry, gdf[field]))
  248. raster = rasterize(
  249. shapes=shapes,
  250. out_shape=(height, width),
  251. transform=transform,
  252. fill=np.nan,
  253. dtype='float32'
  254. )
  255. # 预备掩膜:优先使用行政区边界;若未提供边界,则使用点集凸包限制绘制范围
  256. boundary_mask = None
  257. if boundary_gdf is not None:
  258. self.logger.info("应用边界掩膜: 使用直接提供的GeoDataFrame")
  259. # 确保边界GeoDataFrame的CRS与栅格一致
  260. if boundary_gdf.crs != crs:
  261. boundary_gdf = boundary_gdf.to_crs(crs)
  262. boundary_mask = self.create_boundary_mask(raster, transform, boundary_gdf)
  263. raster[~boundary_mask] = np.nan
  264. else:
  265. try:
  266. # 使用点集凸包作为默认掩膜,避免边界外着色
  267. hull = gdf.unary_union.convex_hull
  268. hull_gdf = gpd.GeoDataFrame(geometry=[hull], crs=crs)
  269. boundary_mask = self.create_boundary_mask(raster, transform, hull_gdf)
  270. raster[~boundary_mask] = np.nan
  271. self.logger.info("已使用点集凸包限制绘制范围")
  272. except Exception as hull_err:
  273. self.logger.warning(f"生成点集凸包掩膜失败,可能会出现边界外着色: {str(hull_err)}")
  274. # 检查栅格数据状态并决定是否插值
  275. nan_count = np.isnan(raster).sum()
  276. total_pixels = raster.size
  277. self.logger.info(f"栅格数据状态: 总像素数 {total_pixels}, NaN像素数 {nan_count} ({nan_count/total_pixels*100:.1f}%)")
  278. # 使用插值方法填充NaN值(如果启用)
  279. if enable_interpolation and nan_count > 0:
  280. self.logger.info(f"✓ 启用插值: 使用 {interpolation_method} 方法填充 {nan_count} 个NaN像素...")
  281. raster = self.interpolate_nan_values(raster, method=interpolation_method)
  282. # 关键修正:插值后再次应用掩膜,确保边界外不被填充
  283. if boundary_mask is not None:
  284. raster[~boundary_mask] = np.nan
  285. final_nan_count = np.isnan(raster).sum()
  286. self.logger.info(f"插值完成: 剩余NaN像素数 {final_nan_count}")
  287. elif enable_interpolation and nan_count == 0:
  288. self.logger.info("✓ 插值已启用,但栅格数据无NaN值,无需插值")
  289. elif not enable_interpolation and nan_count > 0:
  290. self.logger.info(f"✗ 插值已禁用,保留 {nan_count} 个NaN像素")
  291. else:
  292. self.logger.info("✓ 栅格数据完整,无需插值")
  293. # 创建输出目录
  294. os.makedirs(os.path.dirname(output_tif), exist_ok=True)
  295. # 更新元数据
  296. template_meta.update({
  297. "count": 1,
  298. "dtype": 'float32',
  299. "nodata": np.nan,
  300. "width": width,
  301. "height": height,
  302. "transform": transform
  303. })
  304. # 保存栅格文件
  305. with rasterio.open(output_tif, 'w', **template_meta) as dst:
  306. dst.write(raster, 1)
  307. # 计算统计信息
  308. valid_data = raster[~np.isnan(raster)]
  309. stats = None
  310. if len(valid_data) > 0:
  311. stats = {
  312. 'min': float(np.min(valid_data)),
  313. 'max': float(np.max(valid_data)),
  314. 'mean': float(np.mean(valid_data)),
  315. 'std': float(np.std(valid_data)),
  316. 'valid_pixels': int(len(valid_data)),
  317. 'total_pixels': int(raster.size)
  318. }
  319. self.logger.info(f"统计信息: 有效像素 {stats['valid_pixels']}/{stats['total_pixels']}")
  320. self.logger.info(f"数值范围: {stats['min']:.4f} - {stats['max']:.4f}")
  321. else:
  322. self.logger.warning("没有有效数据")
  323. self.logger.info(f"✓ 成功保存: {output_tif}")
  324. return output_tif, stats
  325. except Exception as e:
  326. self.logger.error(f"GeoDataFrame转栅格失败: {str(e)}")
  327. raise
  328. def vector_to_raster(self, input_shapefile, template_tif, output_tif, field,
  329. resolution_factor=16.0, boundary_shp=None, boundary_gdf=None, interpolation_method='nearest', enable_interpolation=True):
  330. """
  331. 将点矢量数据转换为栅格数据
  332. @param input_shapefile: 输入点矢量数据的Shapefile文件路径
  333. @param template_tif: 用作模板的GeoTIFF文件路径
  334. @param output_tif: 输出栅格化后的GeoTIFF文件路径
  335. @param field: 用于栅格化的属性字段名
  336. @param resolution_factor: 分辨率倍数因子
  337. @param boundary_shp: 边界Shapefile文件路径,用于创建掩膜(兼容性保留)
  338. @param boundary_gdf: 边界GeoDataFrame,优先使用此参数而非boundary_shp
  339. @param interpolation_method: 插值方法 ('nearest', 'linear', 'cubic')
  340. @param enable_interpolation: 是否启用空间插值,默认True
  341. @return: 输出的GeoTIFF文件路径和统计信息
  342. """
  343. try:
  344. self.logger.info(f"开始处理: {input_shapefile}")
  345. interpolation_status = "启用" if enable_interpolation else "禁用"
  346. self.logger.info(f"分辨率因子: {resolution_factor}, 插值设置: {interpolation_status} (方法: {interpolation_method})")
  347. # 读取矢量数据
  348. gdf = gpd.read_file(input_shapefile)
  349. # 读取模板栅格
  350. with rasterio.open(template_tif) as src:
  351. template_meta = src.meta.copy()
  352. # 根据分辨率因子计算新的尺寸和变换参数
  353. if resolution_factor != 1.0:
  354. width = int(src.width * resolution_factor)
  355. height = int(src.height * resolution_factor)
  356. transform = rasterio.Affine(
  357. src.transform.a / resolution_factor,
  358. src.transform.b,
  359. src.transform.c,
  360. src.transform.d,
  361. src.transform.e / resolution_factor,
  362. src.transform.f
  363. )
  364. self.logger.info(f"分辨率调整: {src.width}x{src.height} -> {width}x{height}")
  365. else:
  366. width = src.width
  367. height = src.height
  368. transform = src.transform
  369. self.logger.info(f"保持原始分辨率: {width}x{height}")
  370. crs = src.crs
  371. # 投影矢量数据
  372. if gdf.crs != crs:
  373. gdf = gdf.to_crs(crs)
  374. # 栅格化
  375. shapes = ((geom, value) for geom, value in zip(gdf.geometry, gdf[field]))
  376. raster = rasterize(
  377. shapes=shapes,
  378. out_shape=(height, width),
  379. transform=transform,
  380. fill=np.nan,
  381. dtype='float32'
  382. )
  383. # 预备掩膜:优先使用行政区边界;若未提供边界,则使用点集凸包限制绘制范围
  384. boundary_mask = None
  385. if boundary_gdf is not None:
  386. self.logger.info("应用边界掩膜: 使用直接提供的GeoDataFrame")
  387. # 确保边界GeoDataFrame的CRS与栅格一致
  388. if boundary_gdf.crs != crs:
  389. boundary_gdf = boundary_gdf.to_crs(crs)
  390. boundary_mask = self.create_boundary_mask(raster, transform, boundary_gdf)
  391. raster[~boundary_mask] = np.nan
  392. elif boundary_shp and os.path.exists(boundary_shp):
  393. self.logger.info(f"应用边界掩膜: {boundary_shp}")
  394. boundary_gdf_from_file = gpd.read_file(boundary_shp)
  395. if boundary_gdf_from_file.crs != crs:
  396. boundary_gdf_from_file = boundary_gdf_from_file.to_crs(crs)
  397. boundary_mask = self.create_boundary_mask(raster, transform, boundary_gdf_from_file)
  398. raster[~boundary_mask] = np.nan
  399. else:
  400. try:
  401. # 使用点集凸包作为默认掩膜,避免边界外着色
  402. hull = gdf.unary_union.convex_hull
  403. hull_gdf = gpd.GeoDataFrame(geometry=[hull], crs=crs)
  404. boundary_mask = self.create_boundary_mask(raster, transform, hull_gdf)
  405. raster[~boundary_mask] = np.nan
  406. self.logger.info("已使用点集凸包限制绘制范围")
  407. except Exception as hull_err:
  408. self.logger.warning(f"生成点集凸包掩膜失败,可能会出现边界外着色: {str(hull_err)}")
  409. # 检查栅格数据状态并决定是否插值
  410. nan_count = np.isnan(raster).sum()
  411. total_pixels = raster.size
  412. self.logger.info(f"栅格数据状态: 总像素数 {total_pixels}, NaN像素数 {nan_count} ({nan_count/total_pixels*100:.1f}%)")
  413. # 使用插值方法填充NaN值(如果启用)
  414. if enable_interpolation and nan_count > 0:
  415. self.logger.info(f"✓ 启用插值: 使用 {interpolation_method} 方法填充 {nan_count} 个NaN像素...")
  416. raster = self.interpolate_nan_values(raster, method=interpolation_method)
  417. # 关键修正:插值后再次应用掩膜,确保边界外不被填充
  418. if boundary_mask is not None:
  419. raster[~boundary_mask] = np.nan
  420. final_nan_count = np.isnan(raster).sum()
  421. self.logger.info(f"插值完成: 剩余NaN像素数 {final_nan_count}")
  422. elif enable_interpolation and nan_count == 0:
  423. self.logger.info("✓ 插值已启用,但栅格数据无NaN值,无需插值")
  424. elif not enable_interpolation and nan_count > 0:
  425. self.logger.info(f"✗ 插值已禁用,保留 {nan_count} 个NaN像素")
  426. else:
  427. self.logger.info("✓ 栅格数据完整,无需插值")
  428. # 创建输出目录
  429. os.makedirs(os.path.dirname(output_tif), exist_ok=True)
  430. # 更新元数据
  431. template_meta.update({
  432. "count": 1,
  433. "dtype": 'float32',
  434. "nodata": np.nan,
  435. "width": width,
  436. "height": height,
  437. "transform": transform
  438. })
  439. # 保存栅格文件
  440. with rasterio.open(output_tif, 'w', **template_meta) as dst:
  441. dst.write(raster, 1)
  442. # 计算统计信息
  443. valid_data = raster[~np.isnan(raster)]
  444. stats = None
  445. if len(valid_data) > 0:
  446. stats = {
  447. 'min': float(np.min(valid_data)),
  448. 'max': float(np.max(valid_data)),
  449. 'mean': float(np.mean(valid_data)),
  450. 'std': float(np.std(valid_data)),
  451. 'valid_pixels': int(len(valid_data)),
  452. 'total_pixels': int(raster.size)
  453. }
  454. self.logger.info(f"统计信息: 有效像素 {stats['valid_pixels']}/{stats['total_pixels']}")
  455. self.logger.info(f"数值范围: {stats['min']:.4f} - {stats['max']:.4f}")
  456. else:
  457. self.logger.warning("没有有效数据")
  458. self.logger.info(f"✓ 成功保存: {output_tif}")
  459. return output_tif, stats
  460. except Exception as e:
  461. self.logger.error(f"矢量转栅格失败: {str(e)}")
  462. raise
  463. def create_raster_map(self, shp_path, tif_path, output_path,
  464. colormap='green_yellow_red_purple', title="Prediction Map",
  465. output_size=12, figsize=None, dpi=300,
  466. resolution_factor=1.0, enable_interpolation=True,
  467. interpolation_method='nearest', boundary_gdf=None):
  468. """
  469. 创建栅格地图
  470. @param shp_path: 输入的矢量数据路径(兼容性保留)
  471. @param tif_path: 输入的栅格数据路径
  472. @param output_path: 输出图片路径(不包含扩展名)
  473. @param colormap: 色彩方案名称或颜色列表
  474. @param title: 图片标题
  475. @param output_size: 图片尺寸(正方形),如果指定了figsize则忽略此参数
  476. @param figsize: 图片尺寸元组 (width, height),优先级高于output_size
  477. @param dpi: 图片分辨率
  478. @param resolution_factor: 分辨率因子,>1提高分辨率,<1降低分辨率
  479. @param enable_interpolation: 是否启用空间插值,用于处理NaN值或提高分辨率,默认True
  480. @param interpolation_method: 插值方法 ('nearest', 'linear', 'cubic')
  481. @param boundary_gdf: 边界GeoDataFrame(可选,优先使用)
  482. @return: 输出图片文件路径
  483. """
  484. try:
  485. self.logger.info(f"开始创建栅格地图: {tif_path}")
  486. self.logger.info(f"分辨率因子: {resolution_factor}, 启用插值: {enable_interpolation}")
  487. # 读取矢量边界:优先使用boundary_gdf,否则从shp_path读取
  488. if boundary_gdf is not None:
  489. gdf = boundary_gdf
  490. self.logger.info("使用直接提供的边界GeoDataFrame")
  491. elif shp_path:
  492. gdf = gpd.read_file(shp_path)
  493. self.logger.info(f"从文件读取边界数据: {shp_path}")
  494. else:
  495. gdf = None
  496. self.logger.info("未提供边界数据,将使用整个栅格范围")
  497. # 读取并裁剪栅格数据
  498. with rasterio.open(tif_path) as src:
  499. original_transform = src.transform
  500. original_crs = src.crs
  501. if gdf is not None:
  502. # 确保坐标系一致
  503. if gdf.crs != src.crs:
  504. gdf = gdf.to_crs(src.crs)
  505. # 裁剪栅格
  506. geoms = [json.loads(gdf.to_json())["features"][0]["geometry"]]
  507. out_image, out_transform = mask(src, geoms, crop=True)
  508. out_meta = src.meta.copy()
  509. else:
  510. # 如果没有边界文件,使用整个栅格
  511. out_image = src.read()
  512. out_transform = src.transform
  513. out_meta = src.meta.copy()
  514. # 提取数据并处理无效值
  515. raster = out_image[0].astype('float32')
  516. nodata = out_meta.get("nodata", None)
  517. if nodata is not None:
  518. raster[raster == nodata] = np.nan
  519. # 应用分辨率因子重采样
  520. if resolution_factor != 1.0:
  521. self.logger.info(f"应用分辨率因子重采样: {resolution_factor}")
  522. raster, out_transform = self._resample_raster(
  523. raster=raster,
  524. transform=out_transform,
  525. resolution_factor=resolution_factor,
  526. crs=original_crs,
  527. resampling='nearest'
  528. )
  529. # 应用空间插值(如果启用)
  530. if enable_interpolation and np.isnan(raster).any():
  531. self.logger.info(f"使用 {interpolation_method} 方法进行空间插值")
  532. raster = self.interpolate_nan_values(raster, method=interpolation_method)
  533. # 检查是否有有效数据
  534. if np.all(np.isnan(raster)):
  535. raise ValueError("栅格数据中没有有效值")
  536. # 检查数据是否为相同值
  537. valid_data = raster[~np.isnan(raster)]
  538. data_min = np.min(valid_data)
  539. data_max = np.max(valid_data)
  540. if data_min == data_max:
  541. # 所有值相同的情况:创建简单的单色映射
  542. self.logger.info(f"检测到所有值相同 ({data_min:.6f}),使用单色映射")
  543. bounds = [data_min - 0.001, data_min + 0.001] # 创建微小的范围
  544. norm = BoundaryNorm(bounds, ncolors=1)
  545. # 使用绿色系的第一个颜色作为单色
  546. if isinstance(colormap, str) and colormap in COLORMAPS:
  547. single_color = COLORMAPS[colormap][0] # 使用色彩方案的第一个颜色
  548. else:
  549. single_color = '#89AC46' # 默认绿色
  550. cmap = ListedColormap([single_color])
  551. else:
  552. # 正常情况:根据分位数分为6个等级
  553. bounds = np.nanpercentile(raster, [0, 20, 40, 60, 80, 90, 100])
  554. norm = BoundaryNorm(bounds, ncolors=len(bounds) - 1)
  555. # 获取色彩方案
  556. if isinstance(colormap, str):
  557. if colormap in COLORMAPS:
  558. color_list = COLORMAPS[colormap]
  559. else:
  560. self.logger.warning(f"未知色彩方案: {colormap},使用默认方案")
  561. color_list = COLORMAPS['green_yellow_red_purple']
  562. else:
  563. color_list = colormap
  564. cmap = ListedColormap(color_list)
  565. # 设置图片尺寸
  566. if figsize is not None:
  567. fig_size = figsize
  568. else:
  569. fig_size = (output_size, output_size)
  570. # 绘图
  571. fig, ax = plt.subplots(figsize=fig_size)
  572. # 如果有边界文件,需要进一步mask边界外的区域;否则使用栅格有效范围
  573. if gdf is not None:
  574. try:
  575. height, width = raster.shape
  576. transform = out_transform
  577. geom_mask = geometry_mask(
  578. [json.loads(gdf.to_json())["features"][0]["geometry"]],
  579. out_shape=(height, width),
  580. transform=transform,
  581. invert=True
  582. )
  583. raster = np.where(geom_mask, raster, np.nan)
  584. except Exception as mask_err:
  585. self.logger.warning(f"边界掩膜应用失败,将继续绘制已裁剪栅格: {str(mask_err)}")
  586. # 显示栅格数据
  587. show(raster, transform=out_transform, ax=ax, cmap=cmap, norm=norm)
  588. # 添加矢量边界
  589. if gdf is not None:
  590. gdf.boundary.plot(ax=ax, color='black', linewidth=1)
  591. # 设置标题和标签
  592. ax.set_title(title, fontsize=20)
  593. ax.set_xlabel("Longitude", fontsize=18)
  594. ax.set_ylabel("Latitude", fontsize=18)
  595. ax.grid(True, linestyle='--', color='gray', alpha=0.5)
  596. ax.tick_params(axis='y', labelrotation=90)
  597. # 添加色带
  598. if data_min == data_max:
  599. # 单值情况:简化的色带
  600. cbar = plt.colorbar(
  601. plt.cm.ScalarMappable(norm=norm, cmap=cmap),
  602. ax=ax,
  603. ticks=[data_min],
  604. shrink=0.6,
  605. aspect=15
  606. )
  607. cbar.ax.set_yticklabels([f"{data_min:.6f}"])
  608. cbar.set_label("Fixed Value")
  609. else:
  610. # 正常情况:分级色带
  611. tick_labels = [f"{bounds[i]:.1f}" for i in range(len(bounds) - 1)]
  612. cbar = plt.colorbar(
  613. plt.cm.ScalarMappable(norm=norm, cmap=cmap),
  614. ax=ax,
  615. ticks=[(bounds[i] + bounds[i+1]) / 2 for i in range(len(bounds) - 1)],
  616. shrink=0.6,
  617. aspect=15
  618. )
  619. cbar.ax.set_yticklabels(tick_labels)
  620. cbar.set_label("Values")
  621. plt.tight_layout()
  622. # 确保输出目录存在
  623. os.makedirs(os.path.dirname(output_path), exist_ok=True)
  624. # 保存图片
  625. output_file = f"{output_path}.jpg"
  626. plt.savefig(output_file, dpi=dpi, format='jpg', bbox_inches='tight')
  627. plt.close()
  628. self.logger.info(f"✓ 栅格地图创建成功: {output_file}")
  629. return output_file
  630. except Exception as e:
  631. self.logger.error(f"栅格地图创建失败: {str(e)}")
  632. raise
  633. def _resample_raster(self, raster, transform, resolution_factor: float, crs, resampling: str = 'nearest'):
  634. """
  635. 按分辨率因子对二维栅格进行重采样,并返回新栅格与更新后的仿射变换
  636. @param raster: 2D numpy 数组
  637. @param transform: 输入栅格的仿射变换
  638. @param resolution_factor: 分辨率因子 (>1 增加像元密度)
  639. @param crs: 坐标参考系
  640. @param resampling: 重采样方式 ('nearest' | 'bilinear' | 'cubic')
  641. @return: (resampled_raster, new_transform)
  642. """
  643. try:
  644. if resolution_factor == 1.0:
  645. return raster, transform
  646. rows, cols = raster.shape
  647. new_rows = max(1, int(rows * resolution_factor))
  648. new_cols = max(1, int(cols * resolution_factor))
  649. # 更新变换(像元变小)
  650. new_transform = rasterio.Affine(
  651. transform.a / resolution_factor,
  652. transform.b,
  653. transform.c,
  654. transform.d,
  655. transform.e / resolution_factor,
  656. transform.f
  657. )
  658. # 选择重采样算法
  659. resampling_map = {
  660. 'nearest': Resampling.nearest,
  661. 'bilinear': Resampling.bilinear,
  662. 'cubic': Resampling.cubic,
  663. }
  664. resampling_enum = resampling_map.get(resampling, Resampling.nearest)
  665. destination = np.full((new_rows, new_cols), np.nan, dtype='float32')
  666. # 使用 reproject 做重采样(坐标系不变,仅分辨率变化)
  667. reproject(
  668. source=raster,
  669. destination=destination,
  670. src_transform=transform,
  671. src_crs=crs,
  672. dst_transform=new_transform,
  673. dst_crs=crs,
  674. src_nodata=np.nan,
  675. dst_nodata=np.nan,
  676. resampling=resampling_enum
  677. )
  678. return destination, new_transform
  679. except Exception as e:
  680. self.logger.error(f"重采样失败: {str(e)}")
  681. # 失败则返回原始数据,避免中断
  682. return raster, transform
  683. def create_histogram(self, file_path, save_path=None, figsize=(10, 6),
  684. xlabel='像元值', ylabel='频率密度', title='数值分布图',
  685. bins=100, dpi=300):
  686. """
  687. 绘制GeoTIFF文件的直方图
  688. @param file_path: GeoTIFF文件路径
  689. @param save_path: 保存路径,如果为None则自动生成
  690. @param figsize: 图像尺寸
  691. @param xlabel: 横坐标标签
  692. @param ylabel: 纵坐标标签
  693. @param title: 图标题
  694. @param bins: 直方图箱数
  695. @param dpi: 图片分辨率
  696. @return: 输出图片文件路径
  697. """
  698. try:
  699. self.logger.info(f"开始创建直方图: {file_path}")
  700. # 设置seaborn样式
  701. sns.set(style='ticks')
  702. # 读取栅格数据
  703. with rasterio.open(file_path) as src:
  704. band = src.read(1)
  705. nodata = src.nodata
  706. # 处理无效值
  707. if nodata is not None:
  708. band = np.where(band == nodata, np.nan, band)
  709. # 展平数据并移除NaN值
  710. band_flat = band.flatten()
  711. band_flat = band_flat[~np.isnan(band_flat)]
  712. if len(band_flat) == 0:
  713. raise ValueError("栅格数据中没有有效值")
  714. # 检查是否所有值相同
  715. data_min = np.min(band_flat)
  716. data_max = np.max(band_flat)
  717. # 创建图形
  718. plt.figure(figsize=figsize)
  719. if data_min == data_max:
  720. # 所有值相同:创建特殊的单值直方图
  721. self.logger.info(f"检测到所有值相同 ({data_min:.6f}),创建单值直方图")
  722. plt.bar([data_min], [len(band_flat)], width=0.1*abs(data_min) if data_min != 0 else 0.1,
  723. color='steelblue', alpha=0.7, edgecolor='black')
  724. plt.axvline(x=data_min, color='red', linewidth=2, linestyle='--',
  725. label=f'Fixed Value: {data_min:.6f}')
  726. plt.legend()
  727. else:
  728. # 正常情况:绘制直方图和密度曲线
  729. sns.histplot(band_flat, bins=bins, color='steelblue', alpha=0.7,
  730. edgecolor='black', stat='density')
  731. sns.kdeplot(band_flat, color='red', linewidth=2)
  732. # 设置标签和标题
  733. plt.xlabel(xlabel, fontsize=14)
  734. plt.ylabel(ylabel, fontsize=14)
  735. plt.title(title, fontsize=16)
  736. plt.grid(True, linestyle='--', alpha=0.5)
  737. plt.tight_layout()
  738. # 保存图片
  739. if save_path is None:
  740. save_path = file_path.replace('.tif', '_histogram.jpg')
  741. # 确保输出目录存在
  742. os.makedirs(os.path.dirname(save_path), exist_ok=True)
  743. plt.savefig(save_path, dpi=dpi, format='jpg', bbox_inches='tight')
  744. plt.close()
  745. self.logger.info(f"✓ 直方图创建成功: {save_path}")
  746. return save_path
  747. except Exception as e:
  748. self.logger.error(f"直方图创建失败: {str(e)}")
  749. raise
  750. def get_available_colormaps():
  751. """
  752. 获取可用的色彩方案列表
  753. @return: 色彩方案字典
  754. """
  755. return COLORMAPS.copy()
  756. def dataframe_to_raster_workflow(df, template_tif, output_dir,
  757. boundary_gdf=None, resolution_factor=16.0,
  758. interpolation_method='nearest', field_name='Prediction',
  759. lon_col=0, lat_col=1, value_col=2, enable_interpolation=False):
  760. """
  761. DataFrame到栅格转换工作流(内存处理优化版本)
  762. @param df: pandas DataFrame
  763. @param template_tif: 模板GeoTIFF文件路径
  764. @param output_dir: 输出目录
  765. @param boundary_gdf: 边界GeoDataFrame(可选)
  766. @param resolution_factor: 分辨率因子
  767. @param interpolation_method: 插值方法
  768. @param field_name: 字段名称
  769. @param lon_col: 经度列
  770. @param lat_col: 纬度列
  771. @param value_col: 数值列
  772. @param enable_interpolation: 是否启用空间插值,默认False
  773. @return: 输出文件路径字典
  774. """
  775. mapper = MappingUtils()
  776. # 确保输出目录存在
  777. os.makedirs(output_dir, exist_ok=True)
  778. # 生成文件名(基于时间戳,避免冲突)
  779. import time
  780. timestamp = str(int(time.time()))
  781. raster_path = os.path.join(output_dir, f"memory_raster_{timestamp}.tif")
  782. # 1. DataFrame直接转GeoDataFrame(内存处理)
  783. gdf = mapper.dataframe_to_geodataframe(df, lon_col, lat_col, value_col, field_name)
  784. # 2. GeoDataFrame直接转栅格(内存处理)
  785. raster_path, stats = mapper.geodataframe_to_raster(
  786. gdf, template_tif, raster_path, field_name,
  787. resolution_factor, boundary_gdf, interpolation_method, enable_interpolation
  788. )
  789. return {
  790. 'shapefile': None, # 内存处理不生成shapefile
  791. 'raster': raster_path,
  792. 'statistics': stats,
  793. 'geodataframe': gdf # 返回GeoDataFrame供调试使用
  794. }
  795. def csv_to_raster_workflow(csv_file, template_tif, output_dir,
  796. boundary_shp=None, boundary_gdf=None, resolution_factor=16.0,
  797. interpolation_method='nearest', field_name='Prediction',
  798. lon_col=0, lat_col=1, value_col=2, enable_interpolation=False):
  799. """
  800. 完整的CSV到栅格转换工作流(原版本,保持兼容性)
  801. @param csv_file: CSV文件路径
  802. @param template_tif: 模板GeoTIFF文件路径
  803. @param output_dir: 输出目录
  804. @param boundary_shp: 边界Shapefile文件路径(可选,兼容性保留)
  805. @param boundary_gdf: 边界GeoDataFrame(可选,优先使用)
  806. @param resolution_factor: 分辨率因子
  807. @param interpolation_method: 插值方法
  808. @param field_name: 字段名称
  809. @param lon_col: 经度列
  810. @param lat_col: 纬度列
  811. @param value_col: 数值列
  812. @param enable_interpolation: 是否启用空间插值,默认False
  813. @return: 输出文件路径字典
  814. """
  815. mapper = MappingUtils()
  816. # 确保输出目录存在
  817. os.makedirs(output_dir, exist_ok=True)
  818. # 生成文件名
  819. base_name = os.path.splitext(os.path.basename(csv_file))[0]
  820. shapefile_path = os.path.join(output_dir, f"{base_name}_points.shp")
  821. raster_path = os.path.join(output_dir, f"{base_name}_raster.tif")
  822. # 1. CSV转Shapefile
  823. mapper.csv_to_shapefile(csv_file, shapefile_path, lon_col, lat_col, value_col)
  824. # 2. Shapefile转栅格
  825. raster_path, stats = mapper.vector_to_raster(
  826. shapefile_path, template_tif, raster_path, field_name,
  827. resolution_factor, boundary_shp, boundary_gdf, interpolation_method, enable_interpolation
  828. )
  829. return {
  830. 'shapefile': shapefile_path,
  831. 'raster': raster_path,
  832. 'statistics': stats
  833. }