import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.metrics import mean_squared_error, r2_score from model_saver import save_model # 统计筛选时间 import time start = time.time() # 数据加载与划分 data = pd.read_excel('model_optimize\data\Acidity_reduce_new.xlsx') x = data.iloc[:,1:] y = data.iloc[:,0] x.columns = ['pH', 'OM', 'CL', 'H', 'Al'] y.name = 'target' Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42) # 网格搜索调参 param_grid = { # 遍历2至50,以及100 'n_estimators': [i for i in range(2, 100)] + [200], # 遍历2至10,以及20 'max_depth': [None, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20], 'min_samples_split': [2, 5], 'min_samples_leaf': [1, 2], } grid_search = GridSearchCV( estimator=RandomForestRegressor(random_state=40), param_grid=param_grid, cv=5, # scoring='neg_mean_squared_error', scoring='r2', n_jobs=-1, verbose=2 ) grid_search.fit(Xtrain, Ytrain) # 最优模型评估 best_model = grid_search.best_estimator_ train_pred = best_model.predict(Xtrain) test_pred = best_model.predict(Xtest) print(f"Best Params: {grid_search.best_params_}") print(f"Train R²: {r2_score(Ytrain, train_pred):.3f}") print(f"Test R²: {r2_score(Ytest, test_pred):.3f}") print(f"Test RMSE: {mean_squared_error(Ytest, test_pred, squared=False):.3f}") # 模型保存 save_model(best_model, 'model_optimize\pkl', 'rf_model_') # 统计筛选时间 end = time.time() print(f"Time: {end - start:.3f}s")