1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- import pandas as pd
- from sklearn.ensemble import RandomForestRegressor
- from sklearn.model_selection import GridSearchCV, train_test_split
- from sklearn.metrics import mean_squared_error, r2_score
- from model_saver import save_model
- # 统计筛选时间
- import time
- start = time.time()
- # 数据加载与划分
- data = pd.read_excel('model_optimize\data\Acidity_reduce_new.xlsx')
- x = data.iloc[:,1:]
- y = data.iloc[:,0]
- x.columns = ['pH', 'OM', 'CL', 'H', 'Al']
- y.name = 'target'
- Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42)
- # 网格搜索调参
- param_grid = {
- # 遍历2至50,以及100
- 'n_estimators': [i for i in range(2, 100)] + [200],
- # 遍历2至10,以及20
- 'max_depth': [None, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20],
- 'min_samples_split': [2, 5],
- 'min_samples_leaf': [1, 2],
- }
- grid_search = GridSearchCV(
- estimator=RandomForestRegressor(random_state=40),
- param_grid=param_grid,
- cv=5,
- # scoring='neg_mean_squared_error',
- scoring='r2',
- n_jobs=-1,
- verbose=2
- )
- grid_search.fit(Xtrain, Ytrain)
- # 最优模型评估
- best_model = grid_search.best_estimator_
- train_pred = best_model.predict(Xtrain)
- test_pred = best_model.predict(Xtest)
- print(f"Best Params: {grid_search.best_params_}")
- print(f"Train R²: {r2_score(Ytrain, train_pred):.3f}")
- print(f"Test R²: {r2_score(Ytest, test_pred):.3f}")
- print(f"Test RMSE: {mean_squared_error(Ytest, test_pred, squared=False):.3f}")
- # 模型保存
- save_model(best_model, 'model_optimize\pkl', 'rf_model_')
- # 统计筛选时间
- end = time.time()
- print(f"Time: {end - start:.3f}s")
|