model_predict.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import pickle
  2. import pandas as pd
  3. import numpy as np
  4. from sklearn.metrics import mean_squared_error
  5. from pathlib import Path
  6. model_path = Path('model_optimize\pkl\RF_filt.pkl')
  7. # 确保路径存在
  8. if model_path.exists():
  9. with open(model_path, 'rb') as f:
  10. rfc = pickle.load(f)
  11. # 读取数据
  12. data_path = Path('model_optimize\data\data_filt.xlsx')
  13. data=pd.read_excel(data_path)
  14. x = data.iloc[:,1:10]
  15. y = data.iloc[:,-1]
  16. # 转换列名
  17. x.columns = [
  18. 'organic_matter', # OM g/kg
  19. 'chloride', # CL g/kg
  20. 'cec', # CEC cmol/kg
  21. 'h_concentration', # H+ cmol/kg
  22. 'hn', # HN mg/kg
  23. 'al_concentration', # Al3+ cmol/kg
  24. 'free_alumina', # Free alumina g/kg
  25. 'free_iron', # Free iron oxides g/kg
  26. 'delta_ph' # ΔpH
  27. ]
  28. # 预测
  29. y_pred = rfc.predict(x)
  30. # y 与 y_pred 的对比
  31. print('y:',y)
  32. print('y_pred:',y_pred)
  33. # 计算预测误差
  34. errors = y - y_pred
  35. # 图示
  36. import matplotlib.pyplot as plt
  37. # 绘制散点图
  38. plt.figure(figsize=(10, 6))
  39. plt.scatter(y, y_pred, color='blue', label='Predictions', alpha=0.5)
  40. plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', lw=2, label='Perfect fit') # 理想的完美拟合线
  41. plt.xlabel('True Values')
  42. plt.ylabel('Predicted Values')
  43. plt.title('True vs Predicted Values')
  44. plt.legend()
  45. plt.show()
  46. # 绘制误差的直方图
  47. plt.figure(figsize=(10, 6))
  48. plt.hist(errors, bins=20, edgecolor='black', color='lightblue')
  49. plt.axvline(x=0, color='red', linestyle='--', lw=2, label='Zero Error Line') # 添加零误差线
  50. plt.xlabel('Prediction Error')
  51. plt.ylabel('Frequency')
  52. plt.title('Distribution of Prediction Errors')
  53. plt.legend()
  54. plt.show()
  55. # 评分
  56. rmse = np.sqrt(mean_squared_error(y,y_pred))
  57. print("rmse",rmse)