RF_opt.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import pandas as pd
  2. from sklearn.ensemble import RandomForestRegressor
  3. from sklearn.model_selection import GridSearchCV, train_test_split
  4. from sklearn.metrics import mean_squared_error, r2_score
  5. from model_saver import save_model
  6. # 统计筛选时间
  7. import time
  8. start = time.time()
  9. # 数据加载与划分
  10. data = pd.read_excel('model_optimize\data\Acidity_reduce_new.xlsx')
  11. x = data.iloc[:,1:]
  12. y = data.iloc[:,0]
  13. x.columns = ['pH', 'OM', 'CL', 'H', 'Al']
  14. y.name = 'target'
  15. Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size=0.2, random_state=42)
  16. # 网格搜索调参
  17. param_grid = {
  18. # 遍历2至50,以及100
  19. 'n_estimators': [i for i in range(2, 100)] + [200],
  20. # 遍历2至10,以及20
  21. 'max_depth': [None, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20],
  22. 'min_samples_split': [2, 5],
  23. 'min_samples_leaf': [1, 2],
  24. }
  25. grid_search = GridSearchCV(
  26. estimator=RandomForestRegressor(random_state=40),
  27. param_grid=param_grid,
  28. cv=5,
  29. # scoring='neg_mean_squared_error',
  30. scoring='r2',
  31. n_jobs=-1,
  32. verbose=2
  33. )
  34. grid_search.fit(Xtrain, Ytrain)
  35. # 最优模型评估
  36. best_model = grid_search.best_estimator_
  37. train_pred = best_model.predict(Xtrain)
  38. test_pred = best_model.predict(Xtest)
  39. print(f"Best Params: {grid_search.best_params_}")
  40. print(f"Train R²: {r2_score(Ytrain, train_pred):.3f}")
  41. print(f"Test R²: {r2_score(Ytest, test_pred):.3f}")
  42. print(f"Test RMSE: {mean_squared_error(Ytest, test_pred, squared=False):.3f}")
  43. # 模型保存
  44. save_model(best_model, 'model_optimize\pkl', 'rf_model_')
  45. # 统计筛选时间
  46. end = time.time()
  47. print(f"Time: {end - start:.3f}s")