import pickle import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from pathlib import Path model_path = Path('model_optimize\pkl\RF_filt.pkl') # 确保路径存在 if model_path.exists(): with open(model_path, 'rb') as f: rfc = pickle.load(f) # 读取数据 data_path = Path('model_optimize\data\data_filt.xlsx') data=pd.read_excel(data_path) x = data.iloc[:,1:10] y = data.iloc[:,-1] # 转换列名 x.columns = [ 'organic_matter', # OM g/kg 'chloride', # CL g/kg 'cec', # CEC cmol/kg 'h_concentration', # H+ cmol/kg 'hn', # HN mg/kg 'al_concentration', # Al3+ cmol/kg 'free_alumina', # Free alumina g/kg 'free_iron', # Free iron oxides g/kg 'delta_ph' # ΔpH ] # 预测 y_pred = rfc.predict(x) # y 与 y_pred 的对比 print('y:',y) print('y_pred:',y_pred) # 计算预测误差 errors = y - y_pred # 图示 import matplotlib.pyplot as plt # 绘制散点图 plt.figure(figsize=(10, 6)) plt.scatter(y, y_pred, color='blue', label='Predictions', alpha=0.5) plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', lw=2, label='Perfect fit') # 理想的完美拟合线 plt.xlabel('True Values') plt.ylabel('Predicted Values') plt.title('True vs Predicted Values') plt.legend() plt.show() # 绘制误差的直方图 plt.figure(figsize=(10, 6)) plt.hist(errors, bins=20, edgecolor='black', color='lightblue') plt.axvline(x=0, color='red', linestyle='--', lw=2, label='Zero Error Line') # 添加零误差线 plt.xlabel('Prediction Error') plt.ylabel('Frequency') plt.title('Distribution of Prediction Errors') plt.legend() plt.show() # 评分 rmse = np.sqrt(mean_squared_error(y,y_pred)) print("rmse",rmse)