import pickle
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from pathlib import Path

model_path = Path('model_optimize\pkl\RF_filt.pkl')
# 确保路径存在
if model_path.exists():
    with open(model_path, 'rb') as f:
        rfc = pickle.load(f)


# 读取数据
data_path = Path('model_optimize\data\data_filt.xlsx')
data=pd.read_excel(data_path)


x = data.iloc[:,1:10]
y = data.iloc[:,-1]

# 转换列名
x.columns = [
    'organic_matter',        # OM g/kg
    'chloride',              # CL g/kg
    'cec',                   # CEC cmol/kg
    'h_concentration',       # H+ cmol/kg
    'hn',                    # HN mg/kg
    'al_concentration',      # Al3+ cmol/kg
    'free_alumina',          # Free alumina g/kg
    'free_iron',             # Free iron oxides g/kg
    'delta_ph'               # ΔpH
]


# 预测
y_pred = rfc.predict(x)

# y 与 y_pred 的对比
print('y:',y)
print('y_pred:',y_pred)
# 计算预测误差
errors = y - y_pred

# 图示
import matplotlib.pyplot as plt

# 绘制散点图
plt.figure(figsize=(10, 6))
plt.scatter(y, y_pred, color='blue', label='Predictions', alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], color='red', lw=2, label='Perfect fit')  # 理想的完美拟合线
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.title('True vs Predicted Values')
plt.legend()
plt.show()

# 绘制误差的直方图
plt.figure(figsize=(10, 6))
plt.hist(errors, bins=20, edgecolor='black', color='lightblue')
plt.axvline(x=0, color='red', linestyle='--', lw=2, label='Zero Error Line')  # 添加零误差线
plt.xlabel('Prediction Error')
plt.ylabel('Frequency')
plt.title('Distribution of Prediction Errors')
plt.legend()
plt.show()

# 评分
rmse = np.sqrt(mean_squared_error(y,y_pred))
print("rmse",rmse)