12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- import pickle
- import pandas as pd
- import numpy as np
- from sklearn.metrics import mean_squared_error
- from pathlib import Path
- model_path = Path('model_optimize\pkl\RF_filt.pkl')
- # 确保路径存在
- if model_path.exists():
- with open(model_path, 'rb') as f:
- rfc = pickle.load(f)
- # 读取数据
- data_path = Path('model_optimize\data\data_filt.xlsx')
- data=pd.read_excel(data_path)
- x = data.iloc[:,1:10]
- y = data.iloc[:,-1]
- # 转换列名
- x.columns = [
- 'organic_matter', # OM g/kg
- 'chloride', # CL g/kg
- 'cec', # CEC cmol/kg
- 'h_concentration', # H+ cmol/kg
- 'hn', # HN mg/kg
- 'al_concentration', # Al3+ cmol/kg
- 'free_alumina', # Free alumina g/kg
- 'free_iron', # Free iron oxides g/kg
- 'delta_ph' # ΔpH
- ]
- # 预测
- y_pred = rfc.predict(x)
- # y 与 y_pred 的对比
- print('y:',y)
- print('y_pred:',y_pred)
- # 计算预测误差
- errors = y - y_pred
- # 图示
- import matplotlib.pyplot as plt
- # 绘制散点图
- plt.figure(figsize=(10, 6))
- plt.scatter(y, y_pred, color='blue', label='Predictions', alpha=0.5)
- plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', lw=2, label='Perfect fit') # 理想的完美拟合线
- plt.xlabel('True Values')
- plt.ylabel('Predicted Values')
- plt.title('True vs Predicted Values')
- plt.legend()
- plt.show()
- # 绘制误差的直方图
- plt.figure(figsize=(10, 6))
- plt.hist(errors, bins=20, edgecolor='black', color='lightblue')
- plt.axvline(x=0, color='red', linestyle='--', lw=2, label='Zero Error Line') # 添加零误差线
- plt.xlabel('Prediction Error')
- plt.ylabel('Frequency')
- plt.title('Distribution of Prediction Errors')
- plt.legend()
- plt.show()
- # 评分
- rmse = np.sqrt(mean_squared_error(y,y_pred))
- print("rmse",rmse)
|