## 导入常用基本包 import os import pandas as pd import numpy as np from PIL import Image from model_saver import save_model # 机器学习模型导入 from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import cross_val_score,cross_val_predict from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error ## 导入常用辅助函数 from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.model_selection import cross_val_score from sklearn.model_selection import cross_val_predict ## 导入数据处理函数 from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import MinMaxScaler ## 导入评分函数 from sklearn.metrics import r2_score from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error from sklearn.metrics import accuracy_score from sklearn.metrics import log_loss from sklearn.metrics import roc_auc_score import pickle import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from pathlib import Path # 导入数据 data=pd.read_excel('model_optimize\data\data_filt.xlsx') x = data.iloc[:,1:10] y = data.iloc[:,-1] # 为 x 赋予列名 x.columns = [ 'organic_matter', # OM g/kg 'chloride', # CL g/kg 'cec', # CEC cmol/kg 'h_concentration', # H+ cmol/kg 'hn', # HN mg/kg 'al_concentration', # Al3+ cmol/kg 'free_alumina', # Free alumina g/kg 'free_iron', # Free iron oxides g/kg 'delta_ph' # ΔpH ] y.name = 'target_ph' Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42) ## 原始模型 model_path = Path('model_optimize\pkl\RF_filt.pkl') # 确保路径存在 if model_path.exists(): with open(model_path, 'rb') as f: rfc = pickle.load(f) CV_score = cross_val_score(rfc, Xtrain, Ytrain, cv=5).mean() CV_predictions = cross_val_predict(rfc, Xtrain, Ytrain, cv=5) rmse1 = np.sqrt(mean_squared_error(Ytrain,CV_predictions)) regressor = rfc.fit(Xtrain, Ytrain) test_predictions = regressor.predict(Xtest) score_test = regressor.score(Xtest,Ytest) rmse2 = np.sqrt(mean_squared_error(Ytest,test_predictions)) print("5cv_score:",CV_score) print("rmse_5CV",rmse1) print("R2_test:",score_test) print("rmse_test",rmse2) # 指定参数 # 确定参数进行训练 rfc = RandomForestRegressor(random_state=1) CV_score = cross_val_score(rfc, Xtrain, Ytrain, cv=5).mean() CV_predictions = cross_val_predict(rfc, Xtrain, Ytrain, cv=5) rmse1 = np.sqrt(mean_squared_error(Ytrain,CV_predictions)) regressor = rfc.fit(Xtrain, Ytrain) test_predictions = regressor.predict(Xtest) score_test = regressor.score(Xtest,Ytest) rmse2 = np.sqrt(mean_squared_error(Ytest,test_predictions)) print("5cv:",CV_score) print("rmse_5CV",rmse1) print("R2_test:",score_test) print("rmse_test",rmse2)